diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index e480697b6452440f043901140a07cb643f3cbdb6..1e4bcd1b0a498e8359e2c79519d462d43e416ce4 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -68,9 +68,34 @@ using ConvImpl2D_cpu = OperatorImpl_cpu<Conv2D_Op, void *, void *)>; +using Conv3D_Op = Conv_Op<3>; +using ConvImpl3D_cpu = OperatorImpl_cpu<Conv3D_Op, + void(const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 5> &, + const std::array<DimSize_t, 5> &, + const void *, + const void *, + const void *, + void *), + void(const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 3> &, + const std::array<DimSize_t, 5> &, + const std::array<DimSize_t, 5> &, + const void *, + const void *, + const void *, + void *, + void *, + void *)>; + // Implementation entry point registration to Operator REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create); REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create); +REGISTRAR(Conv3D_Op, "cpu", Aidge::ConvImpl3D_cpu::create); + } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index d2b942f6b6f72235f5d079c0fbb402b1b4ed1373..f772ed77cb8d543cfa43df35502784cb6309a5ec 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -12,7 +12,9 @@ #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ +#include <algorithm> #include <array> +#include <cstddef> #include <cstdint> #include "aidge/backend/cpu/operator/ConvImpl.hpp" @@ -234,7 +236,7 @@ static void conv1DBackwardWeights(const array<DimSize_t, 1> &stride, for (DimSize_t kX = 0; kX < kDim[0]; ++kX) { for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { - const DimSize_t iX = oX * stride[0] + kX * dilation[0] ; + const DimSize_t iX = oX * stride[0] + kX * dilation[0]; weightsGrad[kOffsets[1] + kX] += input[iOffsets[1] + iX] * oGrad[oOffsets[1] + oX]; @@ -315,9 +317,9 @@ static void conv1DBackwardBias(const array<DimSize_t, 3> &oDims, * @param[inout] biasesGrad_ gradients of the kernel biases */ template <class I, class W, class B, class O> -void ConvImpl1D_cpu_backward_kernel(const array<DimSize_t,1> &stride, - const array<DimSize_t,1> &dilation, - const array<DimSize_t,1> &kernelDim, +void ConvImpl1D_cpu_backward_kernel(const array<DimSize_t, 1> &stride, + const array<DimSize_t, 1> &dilation, + const array<DimSize_t, 1> &kernelDim, const array<DimSize_t, 3> &inputDims, const array<DimSize_t, 3> &outputDims, const void *input_, @@ -1030,6 +1032,554 @@ REGISTRAR(ConvImpl2D_cpu, std::int32_t, std::int32_t, std::int32_t>}); + +/** + * @brief Forward kernel for 3D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param strideDims stride dimensions + * @param dilationDims dilation dimensions + * @param kDims kernel dimensions + * @param iDims input dimensions. + * @param oDims output dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl3D_cpu_forward_kernel(const array<DimSize_t, 3> &strideDims, + const array<DimSize_t, 3> &dilationDims, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 5> &oDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { + + //////////////////////////////////////////////////////////////////////// + // TENSOR CASTING + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // const array<DimSize_t, 3> dilatedKernelDims{ + // dilationDims[0] * kDims[0] + 1, + // dilationDims[1] * kDims[1] + 1, + // dilationDims[2] * kDims[2] + 1}; + + //////////////////////////////////////////////////////////////////////// + // strides + // for each array they represent + // the number of elems contained in a given dimension + const array<DimSize_t, 4> iStride{ + iDims[1] * iDims[2] * iDims[3] * iDims[4], + iDims[2] * iDims[3] * iDims[4], + iDims[3] * iDims[4], + iDims[4]}; + const array<DimSize_t, 4> oStride{ + oDims[1] * oDims[2] * oDims[3] * oDims[4], + oDims[2] * oDims[3] * oDims[4], + oDims[3] * oDims[4], + oDims[4]}; + const array<DimSize_t, 4> kStride{ + iDims[1] * kDims[0] * kDims[1] * kDims[2], + kDims[0] * kDims[1] * kDims[2], + kDims[1] * kDims[2], + kDims[2]}; + + //////////////////////////////////////////////////////////////////////// + // index offsets + // NOTE: + // in/out dims = {batch, in/outChannels, + // in/outDims[0],in/outDims[1],in/outDims[2]} + array<DimSize_t, 4> iOffset{0, 0, 0, 0}; + array<DimSize_t, 4> oOffset{0, 0, 0, 0}; + // NOTE: + // kernel dims = {outChannels, inChannels, kernelDims[0], + // kernelDims[1], kernelDims[2]} + array<DimSize_t, 4> kOffset{0, 0, 0, 0}; + array<DimSize_t, 2> kDilOffset{0, 0}; + + //////////////////////////////////////////////////////////////////////// + // COMPUTATION + for (DimSize_t batch = 0; batch < iDims[0]; ++batch) { + oOffset[0] = batch * oStride[0]; + iOffset[0] = batch * iStride[0]; + for (DimSize_t oChannel = 0; oChannel < oDims[1]; ++oChannel) { + oOffset[1] = oChannel * oStride[1] + oOffset[0]; + kOffset[0] = oChannel * kStride[0]; + + // Filling given channel with corresponding bias value + if (biases != nullptr) { + B biasVal = biases[oChannel]; + std::fill(output + oOffset[1], + output + oOffset[1] + oStride[1], + biasVal); + } + + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel) { + iOffset[1] = iChannel * iStride[1] + iOffset[0]; + kOffset[1] = iChannel * kStride[1] + kOffset[0]; + + for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { + iOffset[2] = oX * strideDims[0] * iStride[2] + iOffset[1]; + oOffset[2] = oX * oStride[2] + oOffset[1]; + + for (DimSize_t oY = 0; oY < oDims[3]; ++oY) { + iOffset[3] = + oY * strideDims[1] * iStride[3] + iOffset[2]; + oOffset[3] = oY * oStride[3] + oOffset[2]; + + for (DimSize_t oZ = 0; oZ < oDims[4]; ++oZ) { + auto oIdx = oOffset[3] + oZ; + auto iIdx = iOffset[3] + oZ * strideDims[2]; + + for (DimSize_t kX = 0; kX < kDims[0]; ++kX) { + kOffset[2] = kX * kStride[2] + kOffset[1]; + kDilOffset[0] = + kX * dilationDims[0] * iStride[2]; + + for (DimSize_t kY = 0; kY < kDims[1]; ++kY) { + kOffset[3] = kY * kStride[3] + kOffset[2]; + kDilOffset[1] = + kY * dilationDims[1] * iStride[3] + + kDilOffset[0]; + + for (DimSize_t kZ = 0; kZ < kDims[2]; + ++kZ) { + output[oIdx] += + weights[kOffset[3] + kZ] * + input[iIdx + kDilOffset[1] + + kZ * dilationDims[2]]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief perform backpropagation for the input + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * @note formula : + * for i in 0..input_size: + * for n in 0..weight_size: + * dL dYn dL + * ---- = ---- ---- + * dXi dXi Yn + * with : dYn / dXi = w_k + * for each input value + * for each weight + * for each output + * multiply the weight with the associated value + * @note kernel & stride are passed as single integers as they are just + * arrays of length 1 + * @note reminder that kernel dimensions are + * {outChannels, inChannels, {kernelDims}} + * <=> {oDims[1], iDims[1], kernelDim} + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam O Output data type. + * @param[in] stride stride parameter of the convolution operator + * @param[in] dilation dilation parameter of the convolution operator + * @param[in] kDims dimension of the kernel + * @param[in] kStrides nb of elements contained per dimension of the kernel + * @param[in] weights weights values + * @param[in] oDims dimensions of the output + * @param[in] oStrides nb of elements contained per dimension of the output + * @param[in] oGrad output gradient + * @param[in] iDims input dimensions + * @param[in] iStrides nb of elements contained per dimension of the input + * @param[inout] iGrad gradients of the input to update + */ +template <class I, class W, class O> +void conv3DBackwardInput(const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 4> &kStrides, + const W *weights, + const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 4> &iStrides, + I *iGrad) { + // records index offsets for each dimension that have a stride (== all + // dimension except the last) for every parsed tensor + // these serve as checkpoints to avoid recomputing indexes at every + // iteration + array<DimSize_t, 4> iOffset{}; + array<DimSize_t, 4> oOffset{}; + array<DimSize_t, 4> kOffset{}; + array<DimSize_t, 2> iDilkernelOffset{}; // input offset for dilated kernel + + for (DimSize_t batch = 0; batch < iDims[0]; + ++batch, iOffset[0] += iStrides[0], oOffset[0] += oStrides[0]) { + + for (DimSize_t oChannel = 0; oChannel < oDims[1]; oChannel++) { + oOffset[1] = oChannel * oStrides[1] + oOffset[0]; + kOffset[0] = oChannel * kStrides[0]; + + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel) { + iOffset[1] = iChannel * iStrides[1] + iOffset[0]; + kOffset[1] = iChannel * kStrides[1] + kOffset[0]; + + for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { + oOffset[2] = oX * oStrides[2] + oOffset[1]; + iOffset[2] = oX * stride[0] * iStrides[2] + iOffset[1]; + + for (DimSize_t oY = 0; oY < oDims[3]; ++oY) { + oOffset[3] = oY * oStrides[3] + oOffset[2]; + iOffset[3] = oY * stride[1] * iStrides[3] + iOffset[2]; + + for (DimSize_t oZ = 0; oZ < oDims[4]; ++oZ) { + auto oIdx = oOffset[3] + oZ; + auto iIdx = iOffset[3] + oZ * stride[2]; + + for (DimSize_t kX = 0; kX < kDims[0]; ++kX) { + kOffset[2] = kX * kStrides[2] + kOffset[1]; + iDilkernelOffset[0] = + kX * dilation[0] * iStrides[2]; + + for (DimSize_t kY = 0; kY < kDims[1]; ++kY) { + kOffset[3] = kY * kStrides[3] + kOffset[2]; + iDilkernelOffset[1] = + kY * dilation[1] * iStrides[3] + + iDilkernelOffset[0]; + + for (DimSize_t kZ = 0; kZ < kDims[2]; + ++kZ) { + + iGrad[iIdx + iDilkernelOffset[1] + + kZ * dilation[2]] += + weights[kOffset[3] + kZ] * + oGrad[oIdx]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief computes weight backpropagation for conv3D operation + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * weight grad + * for i in 0..weight_size: + * for n in 0..output_size: + * dL dYn dL + * ---- = ---- ---- + * dwi dwi Yn + * with : dYn / dwi = x_k + * @tparam I input dtype + * @tparam W weight dtype + * @tparam O output dtype + * @param[in] iDims input data dimensions + * @param[in] iStrides nb element in each dimension of input tensor + * @param[in] input input data + * @param[in] oDims output data dimmensions + * @param[in] oStrides nb element in each dimension of output tensor + * @param[in] oGrad gradients of output data + * @param[in] kDim dimensions of kernel (not taking in count + * In/OutChannels) + * @param[in] kStrides nb element in each dimension of kernel tensor + * (taking in count In/OutChannels) + * @param[in] stride attribute of the convolution operator + * @param[in] dilation attribute of the convolution operator + * @param[inout] weightsGrad gradients of the kernel weights + */ +template <class I, class W, class O> +void conv3DBackwardWeights(const array<DimSize_t, 5> &iDims, + const array<DimSize_t, 4> &iStrides, + const I *input, + const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + const array<DimSize_t, 3> &kDims, + const array<DimSize_t, 4> &kStrides, + const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + W *weightsGrad) { + // records index offsets for each dimension that have a stride that is + // not 1 (=> all dimension except the last) for every parsed tensor + array<DimSize_t, 4> iOffsets{0, 0, 0, 0}; + array<DimSize_t, 4> oOffsets{0, 0, 0, 0}; + array<DimSize_t, 4> kOffsets{0, 0, 0, 0}; + array<DimSize_t, 3> iDilKernelOffsets{0, 0, 0}; + + for (DimSize_t batch = 0; batch < iDims[0]; ++batch) { + iOffsets[0] = batch * iStrides[0]; + oOffsets[0] = batch * oStrides[0]; + + for (DimSize_t oChannel = 0; oChannel < oDims[1]; ++oChannel) { + oOffsets[1] = oChannel * oStrides[1] + oOffsets[0]; + kOffsets[0] = oChannel * kStrides[0]; + + for (DimSize_t iChannel = 0; iChannel < iDims[1]; ++iChannel) { + iOffsets[1] = iChannel * iStrides[1] + iOffsets[0]; + kOffsets[1] = iChannel * kStrides[1] + kOffsets[0]; + + for (DimSize_t kX = 0; kX < kDims[0]; ++kX) { + kOffsets[2] = kX * kStrides[2] + kOffsets[1]; + iDilKernelOffsets[0] = kX * dilation[0] * iStrides[2]; + + for (DimSize_t kY = 0; kY < kDims[1]; ++kY) { + kOffsets[3] = kY * kStrides[3] + kOffsets[2]; + iDilKernelOffsets[1] = kY * dilation[1] * iStrides[3] + + iDilKernelOffsets[0]; + + for (DimSize_t kZ = 0; kZ < kDims[2]; ++kZ) { + iDilKernelOffsets[2] = + kZ * dilation[2] + iDilKernelOffsets[1]; + + for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + iOffsets[2] = + oX * stride[0] * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[3]; ++oY) { + oOffsets[3] = + oY * oStrides[3] + oOffsets[2]; + iOffsets[3] = + oY * stride[1] * iStrides[3] + + iOffsets[2]; + + for (DimSize_t oZ = 0, iZ = 0; + oZ < oDims[4]; + ++oZ) { + + weightsGrad[kOffsets[3] + kZ] += + input[iOffsets[3] + iZ + + iDilKernelOffsets[2]] * + oGrad[oOffsets[3] + oZ]; + iZ += stride[2]; + } + } + } + } + } + } + } + } + } +} + +/** + * @brief computes bias backpropagation for conv3D operation + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * @note formula : + * Bias grad: + * for i in 0..bias_size: + * for n in 0..output_size: + * dL dYn dL + * ---- = ---- ---- + * dbi dbi Yn + * with : dYn / dbi = 1 + * + * Hence the partial derivative of the loss wrt bias is the + * output loss Hence the bias grad is just the sum of the + * loss values over the batch + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] oDims output tensor dimensions + * @param[in] oStrides nb of elements contained per dimension of the + * output + * @param[in] oGrad output tensor gradients + * @param[inout] biasesGrad biases gradients + */ +template <class B, class O> +static void conv3DBackwardBias(const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + const O *oGrad, + B *biasesGrad) { + // records all index offsets for output tensor + array<DimSize_t, 4> oOffsets{0, 0, 0, 0}; + for (DimSize_t batchIdx = 0; batchIdx < oDims[0]; ++batchIdx) { + oOffsets[0] = batchIdx * oStrides[0]; + + for (DimSize_t oChannel = 0; oChannel < oDims[1]; ++oChannel) { + oOffsets[1] = oChannel * oStrides[1] + oOffsets[0]; + + for (DimSize_t oX = 0; oX < oDims[2]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[3]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + for (DimSize_t oZ = 0; oZ < oDims[4]; ++oZ) { + biasesGrad[oChannel] += oGrad[oOffsets[3] + oZ]; + } + } + } + } + } +} + +/** + * @brief Backward kernel for 3D Convolution on CPU backend. + * @note INPUT & OUTPUT convention is the same as in the + * forward function + * + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] const stride attribute of conv operator + * @param[in] const dilation attribute of conv operator + * @param[in] const kernelDims + * @param[in] const iDims input data dimensions + * @param[in] const oDims output data dimmensions + * @param[in] const input_ input tensor. + * @param[in] const weights_ kernel tensor. + * @param[in] const oGrad_ output tensor gradient. + * @param[inout] iGrad_ input tensor gradient. + * @param[inout] weightsGrad_ kernel weights tensor gradients + * @param[inout] biasesGrad_ kernel biases tensor gradients + */ +template <class I, class W, class B, class O> +void ConvImpl3D_cpu_backward_kernel(const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + const array<DimSize_t, 3> &kernelDims, + const array<DimSize_t, 5> &inputDims, + const array<DimSize_t, 5> &outputDims, + const void *input_, + const void *weights_, + const void *oGrad_, + void *iGrad_, + void *weightsGrad_, + void *biasesGrad_) { + + const I *input = static_cast<const I *>(input_); + I *iGrad = static_cast<I *>(iGrad_); + const I *outputGrad = static_cast<const I *>(oGrad_); + const W *weights = static_cast<const W *>(weights_); + W *weightsGrad = static_cast<W *>(weightsGrad_); + + ////////////////////////////// + // COMPUTING STRIDES + ////////////////////////////// + // NOTE: The ...Stride var represent the number of values contained + // in each dimension they will be used to compute the index offset + // of values while iterating on each tensor NOTE: They are 1 item + // shorter than their corresponding tensor as the number of total + // elements is not used except for gradient initialization + + // {batch_stride, channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> inputStrides{ + inputDims[1] * inputDims[2] * inputDims[3] * inputDims[4], + inputDims[2] * inputDims[3] * inputDims[4], + inputDims[3] * inputDims[4], + inputDims[4]}; + const DimSize_t nbEltsInput = inputDims[0] * inputStrides[0]; + + // {batch_stride, channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> outputStrides{ + outputDims[1] * outputDims[2] * outputDims[3] * outputDims[4], + outputDims[2] * outputDims[3] * outputDims[4], + outputDims[3] * outputDims[4], + outputDims[4]}; + + // NOTE: kernel dims = {iChannel, oChannel, kernelDim0, kernelDim1} + // kernel_strides = {iChannel, oChannel, kernelDim0} + const array<DimSize_t, 4> kernelStrides{ + inputDims[1] * kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[1] * kernelDims[2], + kernelDims[2]}; + + const DimSize_t nbEltsKernel = outputDims[1] * kernelStrides[0]; + + //////////////////////////// + // prepping gradient arrays + std::fill(iGrad, iGrad + nbEltsInput, I(0)); + std::fill(weightsGrad, weightsGrad + nbEltsKernel, W(0)); + + conv3DBackwardInput(stride, + dilation, + kernelDims, + kernelStrides, + weights, + outputDims, + outputStrides, + outputGrad, + inputDims, + inputStrides, + iGrad); + + conv3DBackwardWeights(inputDims, + inputStrides, + input, + outputDims, + outputStrides, + outputGrad, + kernelDims, + kernelStrides, + stride, + dilation, + weightsGrad); + + if (biasesGrad_ != nullptr) { + B *biasesGrad = static_cast<B *>(biasesGrad_); + std::fill(biasesGrad, biasesGrad + outputDims[1], B(0)); + conv3DBackwardBias(outputDims, outputStrides, outputGrad, biasesGrad); + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<float, float, float, float>, + ConvImpl3D_cpu_backward_kernel<float, float, float, float>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>, + ConvImpl3D_cpu_backward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<double, double, double, double>, + ConvImpl3D_cpu_backward_kernel<double, double, double, double>}); +REGISTRAR(ConvImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvImpl3D_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>, + ConvImpl3D_cpu_backward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvTransposeImpl.hpp b/include/aidge/backend/cpu/operator/ConvTransposeImpl.hpp index 7604a96a18e7be44f4c2e8970a0b60b1c4ad918b..d47636ef8112f2905583e92be4ccbd9710102bde 100644 --- a/include/aidge/backend/cpu/operator/ConvTransposeImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvTransposeImpl.hpp @@ -51,9 +51,24 @@ using ConvTransposeImpl2D_cpu = const void *, void *)>; +using ConvTranspose3D_Op = ConvTranspose_Op<3>; +using ConvTransposeImpl3D_cpu = + OperatorImpl_cpu<ConvTranspose3D_Op, + void(const array<DimSize_t, 3> &, + const array<DimSize_t, 3> &, + const array<DimSize_t, 3> &, + const array<DimSize_t, 5> &, + const array<DimSize_t, 5> &, + const void *, + const void *, + const void *, + void *)>; + // Implementation entry point registration to Operator REGISTRAR(ConvTranspose1D_Op, "cpu", ConvTransposeImpl1D_cpu::create); REGISTRAR(ConvTranspose2D_Op, "cpu", ConvTransposeImpl2D_cpu::create); +REGISTRAR(ConvTranspose3D_Op, "cpu", ConvTransposeImpl3D_cpu::create); + } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvTransposeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvTransposeImpl_kernels.hpp index e11dd2625ae1645a8e7c5482b1635b85fb475b06..a734add2acb612d86ceaa1d09514da5a727c7ce4 100644 --- a/include/aidge/backend/cpu/operator/ConvTransposeImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvTransposeImpl_kernels.hpp @@ -300,6 +300,149 @@ REGISTRAR( ConvTransposeImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); +//////////////////////////////////////////////////////// +//////////////////////////////////////////////////////// +// 3D +//////////////////////////////////////////////////////// +//////////////////////////////////////////////////////// + +/** + * @brief performs forward bias operation for convtranspose operator + * + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] bias bias values + * @param[in] oDims dimensions of the output + * @param[in] oStrides nb of elements contained per dimension of the output + * @param[out] output + */ +template <class B, class O> +static void convTranspose3DForwardBias(const B *biases, + const array<DimSize_t, 5> &oDims, + const array<DimSize_t, 4> &oStrides, + O *output) { + array<DimSize_t, 2> outOffsets{0, 0}; + + for (DimSize_t batch = 0; batch < oDims[0]; ++batch) { + outOffsets[0] = batch * oStrides[0]; + + for (DimSize_t outCh = 0; outCh < oDims[1]; ++outCh) { + outOffsets[1] = outCh * oStrides[1] + outOffsets[0]; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + outOffsets[1], + (output + outOffsets[1]) + oStrides[1], + biasVal); + } + } +} + +/** + * @brief forward kernel for convtranspose + * @note ConvTranspose forward is simply convolution backward kernel. + * Check convolution functions for more in-depth details on how the + subfunctions are built. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param[in] stride stride parameter of the convTranspose operator + * @param[in] dilation dilation parameter of the convTranspose operator + * @param[in] inputDims input dimensions + * @param[in] outputDims output tensor dimensions + * @param[in] oStrides nb of elements contained per dimension of the output + * @param[in] input_ values + * @param[in] weight_ values + * @param[in] biases_ values + * @param[out] output + */ +template <class I, class W, class B, class O> +void ConvTransposeImpl3D_cpu_forward_kernel( + const array<DimSize_t, 3> &stride, + const array<DimSize_t, 3> &dilation, + const array<DimSize_t, 3> &kernelDims, + const array<DimSize_t, 5> &inputDims, + const array<DimSize_t, 5> &outputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { + + auto input = static_cast<const I *>(input_); + auto weights = static_cast<const W *>(weights_); + auto output = static_cast<O *>(output_); + + // {channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> inputStrides{ + inputDims[1] * inputDims[2] * inputDims[3] * inputDims[4], + inputDims[2] * inputDims[3] * inputDims[4], + inputDims[3] * inputDims[4], + inputDims[4]}; + + // {channel_stride, dim0_stride, dim1_stride} + const array<DimSize_t, 4> outputStrides{ + outputDims[1] * outputDims[2] * outputDims[3] * outputDims[4], + outputDims[2] * outputDims[3] * outputDims[4], + outputDims[3] * outputDims[4], + outputDims[4]}; + + // NOTE: kernel dims = {inChannels, outChannels, kernelDims[0], + // kernelDims[1]} + const array<DimSize_t, 4> kernelStrides{ + outputDims[1] * kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[0] * kernelDims[1] * kernelDims[2], + kernelDims[1] * kernelDims[2], + kernelDims[2]}; + + if (biases_ != nullptr) { + auto biases = static_cast<const B *>(biases_); + convTranspose3DForwardBias(biases, outputDims, outputStrides, output); + } + + conv3DBackwardInput(stride, + dilation, + kernelDims, + kernelStrides, + weights, + inputDims, + inputStrides, + input, + outputDims, + outputStrides, + output); +} + +REGISTRAR(ConvTransposeImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvTransposeImpl3D_cpu_forward_kernel<std::int32_t, + std::int32_t, + std::int32_t, + std::int32_t>, + nullptr}); +REGISTRAR(ConvTransposeImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvTransposeImpl3D_cpu_forward_kernel<half_float::half, + half_float::half, + half_float::half, + half_float::half>, + nullptr}); +REGISTRAR(ConvTransposeImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvTransposeImpl3D_cpu_forward_kernel<float, float, float, float>, + nullptr}); +REGISTRAR( + ConvTransposeImpl3D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, + ConvTransposeImpl3D_cpu_forward_kernel<double, double, double, double>, + nullptr}); + } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVTRANSPOSEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index bc0bd8cad3b630b89f728d78b59652f31bbcf410..a36ddab71ea41a64483384c9427a1876f891de7c 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -54,9 +54,19 @@ using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>, const void *, void *)>; +using Pad3D_Op = Pad_Op<3>; +using PadImpl3D_cpu = OperatorImpl_cpu<Pad_Op<3>, + void(const std::array<DimSize_t, 6>&, + const PadBorderType, + const double, + const std::array<DimSize_t, 5> &, + const void *, + void *)>; + // Implementation entry point registration to Operator REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create); REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create); +REGISTRAR(Pad3D_Op, "cpu", Aidge::PadImpl3D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp index 6d218cb1d719e8576f6c013ac5a1b9c60a739852..0c1936505632f0973c08922b2645224a86085c02 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp @@ -12,16 +12,27 @@ #ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ #define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ -#include <algorithm> // std::max, std::min +#include <aidge/operator/Pad.hpp> +#include <aidge/utils/ErrorHandling.hpp> +#include <algorithm> // std::max, std::min #include <array> -#include <cstddef> // std::size_t -#include <cstdint> // std::int32_t +#include <cmath> +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <fmt/base.h> +#include <stdexcept> +#include <type_traits> #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { + +//////////////////////////////////////////////////////////////////////////////////////////////// +// PAD 1D +//////////////////////////////////////////////////////////////////////////////////////////////// + /** * @brief Forward kernel for 1D Padding on CPU backend. * @tparam I Input data type. @@ -187,6 +198,368 @@ REGISTRAR(PadImpl2D_cpu, REGISTRAR(PadImpl2D_cpu, {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr}); -} // namespace Aidge + +//////////////////////////////////////////////////////////////////////////////////////////////// +// PAD 3D +//////////////////////////////////////////////////////////////////////////////////////////////// + +template <typename I, typename O> +static inline void +pad3DForwardConstant(const std::array<DimSize_t, 6> &beginEndBorders, + const O borderValue, + const std::array<DimSize_t, 5> &iDims, + const std::array<DimSize_t, 4> &iStrides, + std::array<DimSize_t, 4> &iOffsets, + const I *input, + const std::array<DimSize_t, 3> &oDims, + const std::array<DimSize_t, 4> &oStrides, + std::array<DimSize_t, 4> &oOffsets, + O *output) { + + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + + const SignedDimSize_t iX = oX - beginEndBorders[0]; + if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) { + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + } else { + std::fill(output + oOffsets[2], + output + oOffsets[2] + oStrides[2], + borderValue); + continue; + } + + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + const SignedDimSize_t iY = oY - beginEndBorders[1]; + if (iY >= 0 && iY < static_cast<SignedDimSize_t>(iDims[3])) { + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + } else { + std::fill(output + oOffsets[3], + output + oOffsets[3] + oStrides[3], + borderValue); + continue; + } + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + const SignedDimSize_t iZ = oZ - beginEndBorders[2]; + // if in bounds, takes corresponding value, otherwise takes + // default value + output[oOffsets[3] + oZ] = + (iZ >= 0 && iZ < static_cast<SignedDimSize_t>(iDims[4])) + ? input[iOffsets[3] + iZ] + : borderValue; + } + } + } +} + +/** + * @brief small inline fctn to generate the corresponding input coordinates of + * an output coord in edge padding along a given dimension. + * @param[in] beginBorder Padding at the beginning of given dimension. + * @param[in] iDim Size of given dimension + * @param[in] oCoord output coord along given dimension + */ +static inline DimSize_t padEdgeComputeInputCoord(const DimSize_t beginBorder, + const DimSize_t iDim, + const DimSize_t oCoord) { + return static_cast<DimSize_t>(std::max( + static_cast<SignedDimSize_t>(0), + std::min(static_cast<SignedDimSize_t>(iDim - 1), + static_cast<SignedDimSize_t>(oCoord - beginBorder)))); +} + +template <typename I, typename O> +static inline void +pad3DForwardEdge(const std::array<DimSize_t, 6> &beginEndBorders, + const std::array<DimSize_t, 5> &iDims, + const std::array<DimSize_t, 4> &iStrides, + std::array<DimSize_t, 4> &iOffsets, + const I *input, + const std::array<DimSize_t, 3> &oDims, + const std::array<DimSize_t, 4> &oStrides, + std::array<DimSize_t, 4> &oOffsets, + O *output) { + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + const DimSize_t iX = + padEdgeComputeInputCoord(beginEndBorders[0], iDims[2], oX); + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + const DimSize_t iY = + padEdgeComputeInputCoord(beginEndBorders[1], iDims[3], oY); + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + const DimSize_t iZ = + padEdgeComputeInputCoord(beginEndBorders[2], iDims[4], oZ); + + output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; + } + } + } +} + +/** + * @brief small inline fctn to generate the corresponding input coordinates of + * an output coord in reflect padding along a given dimension. + * @param[in] beginBorder Padding at the beginning of given dimension. + * @param[in] iDim Size of given dimension + * @param[in] oCoord output coord along given dimension + */ +static inline DimSize_t +padReflectComputeInputCoord(const DimSize_t beginBorder, + const DimSize_t iDim, + const DimSize_t oCoord) { + SignedDimSize_t iCoord = + std::abs(static_cast<SignedDimSize_t>(oCoord - beginBorder)); + + // Handle case where iCoord > iDim + // If so iCoord must be changed to (iDim - 1) - delta + // With delta = |iDim - 1 - icoord| + // + // Since iCoord > iDim - 1, |(iDim - 1) - iCoord| <=> iCoord - (iDim - 1) + // <=> iCoord + 1 - iDim + // Hence iDim - 1 - delta <=> iDim - 1 - (iCoord + 1 - iDim) + // <=> 2 * (iDim - 1) - iCoord + iCoord = (iCoord >= static_cast<SignedDimSize_t>(iDim)) + ? static_cast<SignedDimSize_t>(iDim + iDim - 2) - iCoord + : iCoord; + return iCoord; +} + +template <typename I, typename O> +static inline void +pad3DForwardReflect(const std::array<DimSize_t, 6> &beginEndBorders, + const std::array<DimSize_t, 5> &iDims, + const std::array<DimSize_t, 4> &iStrides, + std::array<DimSize_t, 4> &iOffsets, + const I *input, + const std::array<DimSize_t, 3> &oDims, + const std::array<DimSize_t, 4> &oStrides, + std::array<DimSize_t, 4> &oOffsets, + O *output) { + + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + DimSize_t iX = + padReflectComputeInputCoord(beginEndBorders[0], iDims[2], oX); + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + DimSize_t iY = + padReflectComputeInputCoord(beginEndBorders[1], iDims[3], oY); + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + DimSize_t iZ = padReflectComputeInputCoord(beginEndBorders[2], + iDims[4], + oZ); + output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; + } + } + } +} + +/** + * @brief small inline fctn to generate the corresponding input coordinates of + * an output coord in wrap padding along a given dimension. + * @param[in] beginBorder Padding at the beginning of given dimension. + * @param[in] iDim Size of given dimension + * @param[in] oCoord output coord along given dimension + */ +static inline DimSize_t padWrapComputeInputCoord(const DimSize_t beginBorder, + const DimSize_t iDim, + const DimSize_t oCoord) { + return (iDim + oCoord - beginBorder) % iDim; +} + +template <typename I, typename O> +static inline void +pad3DForwardWrap(const std::array<DimSize_t, 6> &beginEndBorders, + const std::array<DimSize_t, 5> &iDims, + const std::array<DimSize_t, 4> &iStrides, + std::array<DimSize_t, 4> &iOffsets, + const I *input, + const std::array<DimSize_t, 3> &oDims, + const std::array<DimSize_t, 4> &oStrides, + std::array<DimSize_t, 4> &oOffsets, + O *output) { + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + + const DimSize_t iX = + padWrapComputeInputCoord(beginEndBorders[0], iDims[2], oX); + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + + const DimSize_t iY = + padWrapComputeInputCoord(beginEndBorders[1], iDims[3], oY); + + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + const DimSize_t iZ = + padWrapComputeInputCoord(beginEndBorders[2], iDims[4], oZ); + output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; + } + } + } +} + +/** + * @brief Forward kernel for 2D Padding on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Parameters from the Operator + * @param iDims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void PadImpl3D_cpu_forward_kernel( + const std::array<DimSize_t, 6> &beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 5> &iDims, + const void *input_, + void *output_) { + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + // not taking in count batch & channel as they are identical to iDims + const std::array<DimSize_t, 3> oDims = { + iDims[2] + beginEndBorders[0] + beginEndBorders[3], + iDims[3] + beginEndBorders[1] + beginEndBorders[4], + iDims[4] + beginEndBorders[2] + beginEndBorders[5]}; + + const std::array<DimSize_t, 4> oStrides = { + iDims[1] * oDims[0] * oDims[1] * oDims[2], + oDims[0] * oDims[1] * oDims[2], + oDims[1] * oDims[2], + oDims[2], + }; + const std::array<DimSize_t, 4> iStrides = { + iDims[1] * iDims[2] * iDims[3] * iDims[4], + iDims[2] * iDims[3] * iDims[4], + iDims[3] * iDims[4], + iDims[4], + }; + + std::array<DimSize_t, 4> oOffsets = {0, 0, 0, 0}; + std::array<DimSize_t, 4> iOffsets = {0, 0, 0, 0}; + + for (std::size_t batch = 0; batch < iDims[0]; ++batch) { + oOffsets[0] = batch * oStrides[0]; + iOffsets[0] = batch * iStrides[0]; + + for (std::size_t ch = 0; ch < iDims[1]; ++ch) { + iOffsets[1] = ch * iStrides[1] + iOffsets[0]; + oOffsets[1] = ch * oStrides[1] + oOffsets[0]; + + switch (borderType) { + case PadBorderType::Constant: { + pad3DForwardConstant(beginEndBorders, + static_cast<O>(borderValue), + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + break; + } + case PadBorderType::Zero: { + pad3DForwardConstant(beginEndBorders, + static_cast<O>(0), + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + break; + } + case PadBorderType::Edge: { + pad3DForwardEdge(beginEndBorders, + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + break; + } + case PadBorderType::Reflect: { + pad3DForwardReflect(beginEndBorders, + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + break; + } + case PadBorderType::Wrap: { + pad3DForwardWrap(beginEndBorders, + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + break; + } + default: { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Pad3D : unsupported padding method : {}.", + borderType); + } + } + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(PadImpl3D_cpu, + {{DataType::Float32, DataFormat::NCHW}, + {DataType::Float32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl3D_cpu_forward_kernel<cpptype_t<DataType::Float32>, + cpptype_t<DataType::Float32>>, + nullptr}); + +REGISTRAR(PadImpl3D_cpu, + {{DataType::Float64, DataFormat::NCHW}, + {DataType::Float64, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl3D_cpu_forward_kernel<cpptype_t<DataType::Float64>, + cpptype_t<DataType::Float64>>, + nullptr}); +REGISTRAR(PadImpl3D_cpu, + {{DataType::Int32, DataFormat::NCHW}, + {DataType::Int32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, + Aidge::PadImpl3D_cpu_forward_kernel<cpptype_t<DataType::Int32>, + cpptype_t<DataType::Int32>>, + nullptr}); +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */ diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index eae5f109f6af8298b90cc8e505ff44eff51bab5c..22f28d504be2a071b5b9e06abbf8106cc836c32d 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -26,7 +26,6 @@ template <> void ConvImpl1D_cpu::forward() { const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); - // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); @@ -104,7 +103,6 @@ template <> void ConvImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); - // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); @@ -178,4 +176,79 @@ void ConvImpl2D_cpu::backward() { op.getInput(2) ? inputBiasGrad.getImpl()->rawPtr() : nullptr); } +template <> +void Aidge::ConvImpl3D_cpu::forward() { + const auto& op_ = dynamic_cast<const Conv_Op<3>&>(mOp); + + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Find the correct kernel type + const auto impl = Registrar<ConvImpl3D_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel + impl.forward(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<5>(), // input dimensions + op_.getOutput(0)->template dims<5>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + +template <> void ConvImpl3D_cpu::backward() { + const auto &op = dynamic_cast<const Conv3D_Op &>(mOp); + const auto &outputGrad = op.getOutput(0)->grad(); + AIDGE_ASSERT(outputGrad, "{}: missing ouput #0 gradient", op.type()); + AIDGE_ASSERT(op.getInput(0)->grad(), + "{}: missing data input(#0) gradient", + op.type()); + AIDGE_ASSERT(op.getInput(1)->grad(), + "{}: missing weight input(#1) gradient", + op.type()); + + std::shared_ptr<Tensor> inputDataGradFallback, inputWeightGradFallback, + inputBiasGradFallback; + const auto &inputDataGrad = + op.getInput(0)->grad()->refCastFrom(inputDataGradFallback, + *(op.getOutput(0))); + const auto &inputWeightGrad = + op.getInput(1)->grad()->refCastFrom(inputWeightGradFallback, + *(op.getOutput(0))); + const auto &inputBiasGrad = + (op.getInput(2) && op.getInput(2)->grad()) + ? op.getInput(2)->grad()->refCastFrom(inputBiasGradFallback, + *(op.getOutput(0))) + : Tensor(); + + // Call kernel + const auto impl = + Registrar<ConvImpl3D_cpu>::create(getBestMatch(getRequiredSpec())); + impl.backward( + op.strideDims(), + op.dilationDims(), + op.kernelDims(), + op.getInput(0)->template dims<5>(), + op.getOutput(0)->template dims<5>(), + + getCPUPtr(op.getInput(0)), + getCPUPtr(op.getInput(1)), + getCPUPtr(outputGrad), + inputDataGrad.getImpl()->rawPtr(), + inputWeightGrad.getImpl()->rawPtr(), + op.getInput(2) ? inputBiasGrad.getImpl()->rawPtr() : nullptr); +} + } // namespace Aidge diff --git a/src/operator/ConvTransposeImpl.cpp b/src/operator/ConvTransposeImpl.cpp index d1135cc92dd3c68746b9dcf80739f4f65acdad2e..4f6a8f62be6cc14303419ce6cfa89b3065b01569 100644 --- a/src/operator/ConvTransposeImpl.cpp +++ b/src/operator/ConvTransposeImpl.cpp @@ -89,3 +89,42 @@ template <> void Aidge::ConvTransposeImpl2D_cpu::backward() { "Backward not yet implemented for Conv_Op<2> on backend cpu"); } +template <> void Aidge::ConvTransposeImpl3D_cpu::forward() { + const auto &op = static_cast<const ConvTranspose_Op<3> &>(mOp); + + AIDGE_ASSERT(op.getInput(0), "{}: missing data input (#0).", op.type()); + AIDGE_ASSERT(op.getInput(1), "{}: missing bias input (#1).", op.type()); + AIDGE_ASSERT(op.getInput(2), "{}: missing weight input (#1).", op.type()); + + std::shared_ptr<Tensor> inputDataFallback, inputWeightFallback, + inputBiasFallback; + const auto &inputData = + op.getInput(0)->refCastFrom(inputDataFallback, *op.getOutput(0)); + const auto &inputWeight = + op.getInput(1)->refCastFrom(inputWeightFallback, *op.getOutput(0)); + const auto &inputBias = + (op.getInput(2)) + ? op.getInput(2)->refCastFrom(inputBiasFallback, *op.getOutput(0)) + : Tensor(); + + // Call kernel + const auto impl = Registrar<ConvTransposeImpl3D_cpu>::create( + getBestMatch(getRequiredSpec())); + + impl.forward(op.strideDims(), + op.dilationDims(), + op.kernelDims(), + op.getInput(0)->template dims<5>(), + op.getOutput(0)->template dims<5>(), + inputData.getImpl()->hostPtr(), + inputWeight.getImpl()->hostPtr(), + op.getInput(2) ? inputBias.getImpl()->hostPtr() : nullptr, + op.getOutput(0)->getImpl()->rawPtr()); +} + +template <> void Aidge::ConvTransposeImpl3D_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Conv_Op<2> on backend cpu"); +} + diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index 9a54437f445a1842b2f97555a0cbea8988acf50a..30f53def1a7e2f48fa0118a1fba7681b2b8dbcbc 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -74,3 +74,26 @@ template <> void Aidge::PadImpl2D_cpu::backward() { AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu"); } + + +template <> +void Aidge::PadImpl3D_cpu::forward() { + const auto& op_ = dynamic_cast<const Pad_Op<3>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); + + // Find the correct kernel type + const auto impl = Registrar<PadImpl3D_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.beginEndBorders(), + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<5>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +template <> +void Aidge::PadImpl3D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu"); +} diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index c7242bbb6f0c7ba6632d1d5937b72e2a0d5cc218..6a49a7995e4827294466e10b1347e81a79478f6a 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -9,21 +9,60 @@ * ********************************************************************************/ +#include <aidge/utils/Types.h> #include <memory> #include <catch2/catch_test_macros.hpp> #include <fmt/core.h> #include "aidge/backend/cpu/operator/ConvImpl.hpp" -#include "aidge/data/Data.hpp" // DataType +#include "aidge/data/Data.hpp" // DataType #include "aidge/data/Tensor.hpp" #include "aidge/filler/Filler.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/Conv.hpp" -#include "aidge/utils/TensorUtils.hpp" #include "aidge/operator/Pad.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +template <DimSize_t DIM> +static std::shared_ptr<OperatorTensor> +setupTestConv(const DimSize_t batchSize, + const DimSize_t inChannels, + const DimSize_t outChannels, + const std::array<DimSize_t, DIM> kernelSize, + const std::array<DimSize_t, DIM> dataSize, + const std::array<DimSize_t, DIM> stride, + const std::array<DimSize_t, DIM> dilation, + const std::array<DimSize_t, 2 * DIM> padding, + const std::shared_ptr<Tensor> input, + const std::shared_ptr<Tensor> weights, + const std::shared_ptr<Tensor> biases) { + input->setBackend("cpu"); + weights->setBackend("cpu"); + biases->setBackend("cpu"); + std::shared_ptr<Node> convNode; + convNode = Conv(inChannels, + outChannels, + kernelSize, + "myconv", + std::array<DimSize_t, DIM>({stride}), + dilation); + auto op = + std::static_pointer_cast<OperatorTensor>(convNode->getOperator()); + + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->associateInput(0, input); + op->associateInput(1, weights); + op->associateInput(2, biases); + + REQUIRE_NOTHROW(op->forwardDims(true)); -using namespace Aidge; + return op; +} /** * @brief ConvDepthWise reference cpp backend forward implmentation tests. @@ -44,6 +83,7 @@ using namespace Aidge; * stride [2,2], dilation [2,2] */ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { + SECTION("2D") { SECTION("Conv with kernel [3,3]") { SECTION("No stride, no dilation") { std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv"); @@ -1714,47 +1754,216 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { //fmt::print("{:.^20}\n", "truth"); //(*expectedOutput).print(); REQUIRE(*(conv_op.getOutput(0)) == *expectedOutput); + } } } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("minimal test, no stride, no dilation, 1 in/outChannel") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {3, 3, 3}; -} + constexpr std::array<DimSize_t, DIM> stride = {1, 1, 1}; + constexpr std::array<DimSize_t, DIM> dilation = {1, 1, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0, 0}); -template <DimSize_t DIM> -std::shared_ptr<OperatorTensor> -setupTestConv(const DimSize_t batchSize, - const DimSize_t inChannels, - const DimSize_t outChannels, - const std::array<DimSize_t, DIM> kernelSize, - const std::array<DimSize_t, DIM> dataSize, - const std::array<DimSize_t, DIM> stride, - const std::array<DimSize_t, DIM> dilation, - const std::array<DimSize_t, 2 * DIM> padding, - const std::shared_ptr<Tensor> input, - const std::shared_ptr<Tensor> weights, - const std::shared_ptr<Tensor> biases) { - input->setBackend("cpu"); - weights->setBackend("cpu"); - biases->setBackend("cpu"); - std::shared_ptr<Node> convNode; - convNode = Conv(inChannels, - outChannels, - kernelSize, - "myconv", - std::array<DimSize_t, DIM>({stride}), - dilation); - auto op = - std::static_pointer_cast<OperatorTensor>(convNode->getOperator()); + constexpr std::array<DimSize_t, DIM> outDataSize = {2, 2, 2}; - op->setDataType(DataType::Float32); - op->setBackend("cpu"); + auto inputSize = std::vector<DimSize_t>( + {batchSize, inChannels, inDataSize[0], inDataSize[1]}); - op->associateInput(0, input); - op->associateInput(1, weights); - op->associateInput(2, biases); + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3.}, {4., 5., 6.}, {7., 8., 9.}}, - REQUIRE_NOTHROW(op->forwardDims(true)); + {{10., 11., 12.}, {13., 14., 15.}, {16., 17., 18.}}, - return op; + {{19., 20., 21.}, {22., 23., 24.}, {25., 26., 27.}}}}}})); + auto weights = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{0.1, 0.2}, {0.3, 0.4}}, + + {{0.5, 0.6}, {0.7, 0.8}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + REQUIRE_NOTHROW(op->forward()); + + auto expectedOutput = Tensor(Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>( + {{{{{{35.610001, 39.209999}, {46.410000, 50.010002}}, + + {{68.010002, 71.610001}, {78.809998, 82.410004}}}}}})); + + CHECK(approxEq<float, float>(*op->getOutput(0), expectedOutput)); + } + SECTION("stride & dilation, multiple outChannels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 2; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {8, 8, 8}; + + constexpr std::array<DimSize_t, DIM> stride = {2, 3, 4}; + constexpr std::array<DimSize_t, DIM> dilation = {4, 3, 2}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {2, 2, 2}; + + auto inputSize = std::vector<DimSize_t>( + {batchSize, inChannels, inDataSize[0], inDataSize[1]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3., 4., 5., 6., 7., 8.}, + {9., 10., 11., 12., 13., 14., 15., 16.}, + {17., 18., 19., 20., 21., 22., 23., 24.}, + {25., 26., 27., 28., 29., 30., 31., 32.}, + {33., 34., 35., 36., 37., 38., 39., 40.}, + {41., 42., 43., 44., 45., 46., 47., 48.}, + {49., 50., 51., 52., 53., 54., 55., 56.}, + {57., 58., 59., 60., 61., 62., 63., 64.}}, + + {{65., 66., 67., 68., 69., 70., 71., 72.}, + {73., 74., 75., 76., 77., 78., 79., 80.}, + {81., 82., 83., 84., 85., 86., 87., 88.}, + {89., 90., 91., 92., 93., 94., 95., 96.}, + {97., 98., 99., 100., 101., 102., 103., 104.}, + {105., 106., 107., 108., 109., 110., 111., 112.}, + {113., 114., 115., 116., 117., 118., 119., 120.}, + {121., 122., 123., 124., 125., 126., 127., 128.}}, + + {{129., 130., 131., 132., 133., 134., 135., 136.}, + {137., 138., 139., 140., 141., 142., 143., 144.}, + {145., 146., 147., 148., 149., 150., 151., 152.}, + {153., 154., 155., 156., 157., 158., 159., 160.}, + {161., 162., 163., 164., 165., 166., 167., 168.}, + {169., 170., 171., 172., 173., 174., 175., 176.}, + {177., 178., 179., 180., 181., 182., 183., 184.}, + {185., 186., 187., 188., 189., 190., 191., 192.}}, + + {{193., 194., 195., 196., 197., 198., 199., 200.}, + {201., 202., 203., 204., 205., 206., 207., 208.}, + {209., 210., 211., 212., 213., 214., 215., 216.}, + {217., 218., 219., 220., 221., 222., 223., 224.}, + {225., 226., 227., 228., 229., 230., 231., 232.}, + {233., 234., 235., 236., 237., 238., 239., 240.}, + {241., 242., 243., 244., 245., 246., 247., 248.}, + {249., 250., 251., 252., 253., 254., 255., 256.}}, + + {{257., 258., 259., 260., 261., 262., 263., 264.}, + {265., 266., 267., 268., 269., 270., 271., 272.}, + {273., 274., 275., 276., 277., 278., 279., 280.}, + {281., 282., 283., 284., 285., 286., 287., 288.}, + {289., 290., 291., 292., 293., 294., 295., 296.}, + {297., 298., 299., 300., 301., 302., 303., 304.}, + {305., 306., 307., 308., 309., 310., 311., 312.}, + {313., 314., 315., 316., 317., 318., 319., 320.}}, + + {{321., 322., 323., 324., 325., 326., 327., 328.}, + {329., 330., 331., 332., 333., 334., 335., 336.}, + {337., 338., 339., 340., 341., 342., 343., 344.}, + {345., 346., 347., 348., 349., 350., 351., 352.}, + {353., 354., 355., 356., 357., 358., 359., 360.}, + {361., 362., 363., 364., 365., 366., 367., 368.}, + {369., 370., 371., 372., 373., 374., 375., 376.}, + {377., 378., 379., 380., 381., 382., 383., 384.}}, + + {{385., 386., 387., 388., 389., 390., 391., 392.}, + {393., 394., 395., 396., 397., 398., 399., 400.}, + {401., 402., 403., 404., 405., 406., 407., 408.}, + {409., 410., 411., 412., 413., 414., 415., 416.}, + {417., 418., 419., 420., 421., 422., 423., 424.}, + {425., 426., 427., 428., 429., 430., 431., 432.}, + {433., 434., 435., 436., 437., 438., 439., 440.}, + {441., 442., 443., 444., 445., 446., 447., 448.}}, + + {{449., 450., 451., 452., 453., 454., 455., 456.}, + {457., 458., 459., 460., 461., 462., 463., 464.}, + {465., 466., 467., 468., 469., 470., 471., 472.}, + {473., 474., 475., 476., 477., 478., 479., 480.}, + {481., 482., 483., 484., 485., 486., 487., 488.}, + {489., 490., 491., 492., 493., 494., 495., 496.}, + {497., 498., 499., 500., 501., 502., 503., 504.}, + {505., 506., 507., 508., 509., 510., 511., 512.}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.1, 0.2}, {0.3, 0.4}}, {{0.5, 0.6}, {0.7, 0.8}}}}, + + {{{{0.9, 1.0}, {1.1, 1.2}}, {{1.3, 1.4}, {1.5, 1.6}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01, 0.02}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + REQUIRE_NOTHROW(op->forward()); + + auto expectedOutput = Tensor(Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>( + {{{{{{726.010010, 740.410034}, {812.409973, 826.809998}}, + + {{1186.810059, 1201.210083}, {1273.210083, 1287.609985}}}, + + {{{1634.820068, 1674.820068}, {1874.820068, 1914.819946}}, + + {{2914.820312, 2954.820068}, + {3154.820068, 3194.819824}}}}}})); + + CHECK(approxEq<float, float>(*op->getOutput(0), expectedOutput)); + } + } } TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { @@ -1777,39 +1986,17 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { auto input = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000}, - {1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000, - 1.000000}}}})); + {{{{1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}, + {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}}}})); auto weights = std::make_shared<Tensor>( Array3D<float, outChannels, inChannels, kernelSize>( - {{{{0.100000, 0.100000, 0.100000, 0.100000}, - {0.100000, 0.100000, 0.100000, 0.100000}}, - {{0.100000, 0.100000, 0.100000, 0.100000}, - {0.100000, 0.100000, 0.100000, 0.100000}}, - {{0.100000, 0.100000, 0.100000, 0.100000}, - {0.100000, 0.100000, 0.100000, 0.100000}}} + {{{{0.1, 0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1, 0.1}}, + {{0.1, 0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1, 0.1}}, + {{0.1, 0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1, 0.1}}} })); @@ -1845,30 +2032,8 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { SECTION("Input Grad") { auto expectedInputGrad = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{0.3000, - 0.6000, - 0.9000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 0.9000, - 0.6000, - 0.3000}, - {0.3000, - 0.6000, - 0.9000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 1.2000, - 0.9000, - 0.6000, - 0.3000}}}})); + {{{{0.3, 0.6, 0.9, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 0.9, 0.6, 0.3}, + {0.3, 0.6, 0.9, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 0.9, 0.6, 0.3}}}})); CHECK(approxEq<float, float>(*op->getInput(0)->grad(), *expectedInputGrad)); } @@ -1919,9 +2084,9 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { {1., 1., 1., 1., 1., 1., 1., 1.}}}})); auto weights = std::make_shared<Tensor>( Array3D<float, outChannels, inChannels, kernelSize>( - {{{{0.1000, 0.1000}, - {0.1000, 0.1000}, - {0.1000, 0.1000}}}})); + {{{{0.1, 0.1}, + {0.1, 0.1}, + {0.1, 0.1}}}})); auto biases = std::make_shared<Tensor>( Array1D<float, outChannels>({0.060000})); @@ -1955,55 +2120,13 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { SECTION("Input Grad") { auto expectedInputGrad = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}}, - - {{0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000, - 0.0000, - 0.1000, - 0.1000}}}})); + {{{{0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}, + {0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}, + {0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}}, + + {{0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}, + {0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}, + {0.1, 0.1, 0.0, 0.1, 0.1, 0.0, 0.1, 0.1}}}})); CHECK(approxEq<float, float>(*op->getInput(0)->grad(), *expectedInputGrad)); } @@ -2047,12 +2170,12 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { {1., 1., 1., 1., 1., 1., 1., 1.}}}})); auto weights = std::make_shared<Tensor>( Array3D<float, outChannels, inChannels, kernelSize>( - {{{{0.1000, 0.1000}, - {0.1000, 0.1000}, - {0.1000, 0.1000}}}})); + {{{{0.1, 0.1}, + {0.1, 0.1}, + {0.1, 0.1}}}})); auto biases = std::make_shared<Tensor>( - Array1D<float, outChannels>({0.060000})); + Array1D<float, outChannels>({0.06})); auto op = setupTestConv<DIM>( batchSize, @@ -2083,55 +2206,13 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { SECTION("Input Grad") { auto expectedInputGrad = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}}, - - {{0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}, - {0.1000, - 0.1000, - 0.2000, - 0.2000, - 0.2000, - 0.2000, - 0.1000, - 0.1000}}}})); + {{{{0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}, + {0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}, + {0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}}, + + {{0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}, + {0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}, + {0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1}}}})); CHECK(approxEq<float, float>(*op->getInput(0)->grad(), *expectedInputGrad)); } @@ -2171,28 +2252,28 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}}}})); auto weights = std::make_shared<Tensor>( Array3D<float, outChannels, inChannels, kernelSize>( - {{{{0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}}, - - {{0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}}, - - {{0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}}, - - {{0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}, - {0.1000, 0.1000, 0.1000}}}})); + {{{{0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}}, + + {{0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}}, + + {{0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}}, + + {{0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}, + {0.1, 0.1, 0.1}}}})); auto biases = std::make_shared<Tensor>(Array1D<float, outChannels>( - {{0.0100, 0.0100, 0.0100, 0.0100}})); + {{0.01, 0.01, 0.01, 0.01}})); auto op = setupTestConv<DIM>( batchSize, @@ -2223,58 +2304,10 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { SECTION("Input Grad") { auto expectedInputGrad = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.0000, - 0.0000}, - {0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.0000, - 0.0000}, - {0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.0000, - 0.0000}, - {0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.4000, - 0.4000, - 0.0000, - 0.0000, - 0.4000, - 0.0000, - 0.0000}}}})); + {{{{0.4,0.,0.,0.4,0.4,0.,0.4,0.4,0.,0.,0.4,0.,0.}, + {0.4,0.,0.,0.4,0.4,0.,0.4,0.4,0.,0.,0.4,0.,0.}, + {0.4,0.,0.,0.4,0.4,0.,0.4,0.4,0.,0.,0.4,0.,0.}, + {0.4,0.,0.,0.4,0.4,0.,0.4,0.4,0.,0.,0.4,0.,0.}}}})); CHECK(approxEq<float, float>(*op->getInput(0)->grad(), *expectedInputGrad)); } @@ -2334,9 +2367,9 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { {9., 10., 11., 12., 13., 14., 15., 16.}}}})); auto weights = std::make_shared<Tensor>( Array3D<float, outChannels, inChannels, kernelSize>( - {{{{0.1000, 0.2000, 0.3000}, {0.4000, 0.5000, 0.6000}}, + {{{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}, - {{0.7000, 0.8000, 0.9000}, {1.0000, 1.1000, 1.2000}}}})); + {{0.7, 0.8, 0.9}, {1.0, 1.1, 1.2}}}})); auto biases = std::make_shared<Tensor>( Array1D<float, outChannels>({{0.0100, 0.0200}})); @@ -2367,22 +2400,8 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { SECTION("Input Grad") { auto expectedInputGrad = std::make_shared<Tensor>( Array3D<float, batchSize, inChannels, inDataSize>( - {{{{2.2000, - 0.0000, - 5.6000, - 0.0000, - 6.6000, - 0.0000, - 4.2000, - 0.0000}, - {3.4000, - 0.0000, - 8.6000, - 0.0000, - 9.6000, - 0.0000, - 6.0000, - 0.0000}}}})); + {{{{2.2,0.,5.6,0.,6.6,0.,4.2,0.}, + {3.4,0.,8.6,0.,9.6,0.,6.,0.}}}})); CHECK(approxEq<float, float>(*op->getInput(0)->grad(), *expectedInputGrad)); } @@ -2713,4 +2732,505 @@ TEST_CASE("[cpu/operator] Conv(backward)", "[Conv][CPU]") { } } } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("basic test, square kernel, stride, dilation") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 1; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {4, 4, 4}; + + constexpr std::array<DimSize_t, DIM> stride = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> dilation = {2, 2, 2}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {1, 1, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>( + Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{1., 2., 3., 4.}, + {5., 6., 7., 8.}, + {9., 10., 11., 12.}, + {13., 14., 15., 16.}}, + + {{17., 18., 19., 20.}, + {21., 22., 23., 24.}, + {25., 26., 27., 28.}, + {29., 30., 31., 32.}}, + + {{33., 34., 35., 36.}, + {37., 38., 39., 40.}, + {41., 42., 43., 44.}, + {45., 46., 47., 48.}}, + + {{49., 50., 51., 52.}, + {53., 54., 55., 56.}, + {57., 58., 59., 60.}, + {61., 62., 63., 64.}}}}}})); + + auto weights = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{0.1, 0.2}, {0.3, 0.4}}, + + {{0.5, 0.6}, {0.7, 0.8}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{0.1, 0.0, 0.2, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.3, 0.0, 0.4, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.5, 0.0, 0.6, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.7, 0.0, 0.8, 0.0}, + {0.0, 0.0, 0.0, 0.0}}, + + {{0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = std::make_shared<Tensor>( + Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>({{{{{{1., 3.}, {9., 11.}}, + + {{33., 35.}, {41., 43.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{1.}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + SECTION("square kernel, multiple in/out channels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 2; + constexpr DimSize_t outChannels = 1; + constexpr std::array<DimSize_t, DIM> kernelSize = {2, 2, 2}; + constexpr std::array<DimSize_t, DIM> inDataSize = {2, 2, 2}; + + constexpr std::array<DimSize_t, DIM> stride = {1, 1, 1}; + constexpr std::array<DimSize_t, DIM> dilation = {1, 1, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {1, 1, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1.000000, 2.000000}, {3.000000, 4.000000}}, + + {{5.000000, 6.000000}, {7.000000, 8.000000}}}, + + {{{9.000000, 10.000000}, {11.000000, 12.000000}}, + + {{13.000000, 14.000000}, {15.000000, 16.000000}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.100000, 0.200000}, {0.300000, 0.400000}}, + + {{0.500000, 0.600000}, {0.700000, 0.800000}}}, + + {{{0.900000, 1.000000}, {1.100000, 1.200000}}, + + {{1.300000, 1.400000}, {1.500000, 1.600000}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.010000}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.000000}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = + std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{0.100000, 0.200000}, {0.300000, 0.400000}}, + + {{0.500000, 0.600000}, {0.700000, 0.800000}}}, + + {{{0.900000, 1.000000}, {1.100000, 1.200000}}, + + {{1.300000, 1.400000}, {1.500000, 1.600000}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = + std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{1.000000, 2.000000}, {3.000000, 4.000000}}, + + {{5.000000, 6.000000}, {7.000000, 8.000000}}}, + + {{{9.000000, 10.000000}, {11.000000, 12.000000}}, + + {{13.000000, 14.000000}, + {15.000000, 16.000000}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{1.000000}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + SECTION("non square kernel, stride, dilation, multiple " + "in/outchannels") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 2; + constexpr DimSize_t outChannels = 2; + constexpr std::array<DimSize_t, DIM> kernelSize = {1, 2, 3}; + constexpr std::array<DimSize_t, DIM> inDataSize = {5, 5, 5}; + + constexpr std::array<DimSize_t, DIM> stride = {1, 2, 3}; + constexpr std::array<DimSize_t, DIM> dilation = {3, 2, 1}; + constexpr std::array<DimSize_t, 2 * DIM> padding({0, 0}); + + constexpr std::array<DimSize_t, DIM> outDataSize = {5, 2, 1}; + + auto inputSize = std::vector<DimSize_t>({batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]}); + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3., 4., 5.}, + {6., 7., 8., 9., 10.}, + {11., 12., 13., 14., 15.}, + {16., 17., 18., 19., 20.}, + {21., 22., 23., 24., 25.}}, + + {{26., 27., 28., 29., 30.}, + {31., 32., 33., 34., 35.}, + {36., 37., 38., 39., 40.}, + {41., 42., 43., 44., 45.}, + {46., 47., 48., 49., 50.}}, + + {{51., 52., 53., 54., 55.}, + {56., 57., 58., 59., 60.}, + {61., 62., 63., 64., 65.}, + {66., 67., 68., 69., 70.}, + {71., 72., 73., 74., 75.}}, + + {{76., 77., 78., 79., 80.}, + {81., 82., 83., 84., 85.}, + {86., 87., 88., 89., 90.}, + {91., 92., 93., 94., 95.}, + {96., 97., 98., 99., 100.}}, + + {{101., 102., 103., 104., 105.}, + {106., 107., 108., 109., 110.}, + {111., 112., 113., 114., 115.}, + {116., 117., 118., 119., 120.}, + {121., 122., 123., 124., 125.}}}, + + {{{126., 127., 128., 129., 130.}, + {131., 132., 133., 134., 135.}, + {136., 137., 138., 139., 140.}, + {141., 142., 143., 144., 145.}, + {146., 147., 148., 149., 150.}}, + + {{151., 152., 153., 154., 155.}, + {156., 157., 158., 159., 160.}, + {161., 162., 163., 164., 165.}, + {166., 167., 168., 169., 170.}, + {171., 172., 173., 174., 175.}}, + + {{176., 177., 178., 179., 180.}, + {181., 182., 183., 184., 185.}, + {186., 187., 188., 189., 190.}, + {191., 192., 193., 194., 195.}, + {196., 197., 198., 199., 200.}}, + + {{201., 202., 203., 204., 205.}, + {206., 207., 208., 209., 210.}, + {211., 212., 213., 214., 215.}, + {216., 217., 218., 219., 220.}, + {221., 222., 223., 224., 225.}}, + + {{226., 227., 228., 229., 230.}, + {231., 232., 233., 234., 235.}, + {236., 237., 238., 239., 240.}, + {241., 242., 243., 244., 245.}, + {246., 247., 248., 249., 250.}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}}, + + {{{0.7, 0.8, 0.9}, {1.0, 1.1, 1.2}}}}, + + {{{{1.3, 1.4, 1.5}, {1.6, 1.7, 1.8}}}, + + {{{1.9, 2.0, 2.1}, {2.2, 2.3, 2.4}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01, 0.02}})); + + auto outputGrad = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{1.}, {2.}}, + + {{3.}, {4.}}, + + {{5.}, {6.}}, + + {{7.}, {8.}}, + + {{9.}, {10.}}}, + + {{{11.}, {12.}}, + + {{13.}, {14.}}, + + {{15.}, {16.}}, + + {{17.}, {18.}}, + + {{19.}, {20.}}}}}})); + + auto op = setupTestConv<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + padding, + input, + weights, + biases); + + //////////////////////////////////// + // setup gradients for backward + op->getOutput(0)->setGrad(outputGrad); + + REQUIRE_NOTHROW(op->backward()); + + SECTION("Input Grad") { + auto expectedInputGrad = + std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{14.400001, 15.599999, 16.799999, 0., 0.}, + {0., 0., 0., 0., 0.}, + {33.800003, 36.400002, 39., 0., 0.}, + {0., 0., 0., 0., 0.}, + {20., 21.400002, 22.800001, 0., 0.}}, + + {{17.200001, 18.799999, 20.400000, 0., 0.}, + {0., 0., 0., 0., 0.}, + {40.599998, 44., 47.400002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {24., 25.800001, 27.600000, 0., 0.}}, + + {{20.000002, 22., 24., 0., 0.}, + {0., 0., 0., 0., 0.}, + {47.400002, 51.599998, 55.800003, 0., 0.}, + {0., 0., 0., 0., 0.}, + {28., 30.200001, 32.400002, 0., 0.}}, + + {{22.800001, 25.199999, 27.600000, 0., 0.}, + {0., 0., 0., 0., 0.}, + {54.200001, 59.200001, 64.200005, 0., 0.}, + {0., 0., 0., 0., 0.}, + {32., 34.600002, 37.200001, 0., 0.}}, + + {{25.600002, 28.400000, 31.200001, 0., 0.}, + {0., 0., 0., 0., 0.}, + {61., 66.800003, 72.600006, 0., 0.}, + {0., 0., 0., 0., 0.}, + {36., 39., 42., 0., 0.}}}, + + {{{21.600000, 22.799999, 24.000002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {49.400002, 52., 54.600002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {28.400002, 29.799999, 31.200001, 0., 0.}}, + + {{26.799999, 28.400000, 30.000002, 0., 0.}, + {0., 0., 0., 0., 0.}, + {61., 64.400002, 67.800003, 0., 0.}, + {0., 0., 0., 0., 0.}, + {34.799999, 36.599998, 38.400002, 0., 0.}}, + + {{32., 34., 36.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {72.599998, 76.800003, 81., 0., 0.}, + {0., 0., 0., 0., 0.}, + {41.200001, 43.400002, 45.600002, 0., 0.}}, + + {{37.200001, 39.599998, 42.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {84.199997, 89.199997, 94.200005, 0., 0.}, + {0., 0., 0., 0., 0.}, + {47.600002, 50.200001, 52.800003, 0., 0.}}, + + {{42.399998, 45.200001, 48.000004, 0., 0.}, + {0., 0., 0., 0., 0.}, + {95.800003, 101.599998, 107.400009, 0., 0.}, + {0., 0., 0., 0., 0.}, + {54., 57., 60., 0., 0.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(0)->grad(), + *expectedInputGrad)); + } + SECTION("Weight grad") { + auto expectedWeightsGrad = std::make_shared< + Tensor>(Array5D<float, + outChannels, + inChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{4105., 4160., 4215.}, {4655., 4710., 4765.}}}, + + {{{10980., 11035., 11090.}, {11530., 11585., 11640.}}}}, + + {{{{9705., 9860., 10015.}, {11255., 11410., 11565.}}}, + + {{{29080., 29235., 29390.}, + {30630., 30785., 30940.}}}}}})); + CHECK(approxEq<float, float>(*op->getInput(1)->grad(), + *expectedWeightsGrad)); + } + SECTION("Bias Grad") { + auto expectedBiasesGrad = std::make_shared<Tensor>( + Array1D<float, outChannels>({{55., 155.}})); + CHECK(approxEq<float, float>(*op->getInput(2)->grad(), + *expectedBiasesGrad)); + } + } + } } + +} // namespace Aidge diff --git a/unit_tests/operator/Test_ConvTranspose.cpp b/unit_tests/operator/Test_ConvTranspose.cpp index 6e889e809e0a05d551829bd15fda9cc651068465..7bb87835a3d9210b7f2f6bce682df60657d049a7 100644 --- a/unit_tests/operator/Test_ConvTranspose.cpp +++ b/unit_tests/operator/Test_ConvTranspose.cpp @@ -2293,6 +2293,1332 @@ TEST_CASE("[cpu/operator] ConvTranspose(forward)", "[ConvTranspose][CPU]") { CHECK(approxEq<float, float>(*op->getOutput(0), *expectedOutput)); } } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("Big test to ensure kernel capabilities") { + constexpr DimSize_t batchSize = 1; + constexpr DimSize_t inChannels = 3; + constexpr DimSize_t outChannels = 2; + + constexpr std::array<DimSize_t, DIM> kernelSize{1, 2, 3}; + + constexpr std::array<DimSize_t, DIM> inDataSize{4, 4, 5}; + constexpr std::array<DimSize_t, DIM> outDataSize{4, 10, 15}; + + constexpr std::array<DimSize_t, DIM> stride{1, 2, 3}; + constexpr std::array<DimSize_t, DIM> dilation{2, 3, 1}; + + auto input = std::make_shared<Tensor>(Array5D<float, + batchSize, + inChannels, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{1., 2., 3., 4., 5.}, + {6., 7., 8., 9., 10.}, + {11., 12., 13., 14., 15.}, + {16., 17., 18., 19., 20.}}, + + {{21., 22., 23., 24., 25.}, + {26., 27., 28., 29., 30.}, + {31., 32., 33., 34., 35.}, + {36., 37., 38., 39., 40.}}, + + {{41., 42., 43., 44., 45.}, + {46., 47., 48., 49., 50.}, + {51., 52., 53., 54., 55.}, + {56., 57., 58., 59., 60.}}, + + {{61., 62., 63., 64., 65.}, + {66., 67., 68., 69., 70.}, + {71., 72., 73., 74., 75.}, + {76., 77., 78., 79., 80.}}}, + + {{{81., 82., 83., 84., 85.}, + {86., 87., 88., 89., 90.}, + {91., 92., 93., 94., 95.}, + {96., 97., 98., 99., 100.}}, + + {{101., 102., 103., 104., 105.}, + {106., 107., 108., 109., 110.}, + {111., 112., 113., 114., 115.}, + {116., 117., 118., 119., 120.}}, + + {{121., 122., 123., 124., 125.}, + {126., 127., 128., 129., 130.}, + {131., 132., 133., 134., 135.}, + {136., 137., 138., 139., 140.}}, + + {{141., 142., 143., 144., 145.}, + {146., 147., 148., 149., 150.}, + {151., 152., 153., 154., 155.}, + {156., 157., 158., 159., 160.}}}, + + {{{161., 162., 163., 164., 165.}, + {166., 167., 168., 169., 170.}, + {171., 172., 173., 174., 175.}, + {176., 177., 178., 179., 180.}}, + + {{181., 182., 183., 184., 185.}, + {186., 187., 188., 189., 190.}, + {191., 192., 193., 194., 195.}, + {196., 197., 198., 199., 200.}}, + + {{201., 202., 203., 204., 205.}, + {206., 207., 208., 209., 210.}, + {211., 212., 213., 214., 215.}, + {216., 217., 218., 219., 220.}}, + + {{221., 222., 223., 224., 225.}, + {226., 227., 228., 229., 230.}, + {231., 232., 233., 234., 235.}, + {236., 237., 238., 239., 240.}}}}}})); + + auto weights = std::make_shared<Tensor>(Array5D<float, + inChannels, + outChannels, + kernelSize[0], + kernelSize[1], + kernelSize[2]>( + {{{{{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}}, + + {{{0.7, 0.8, 0.9}, {1.0, 1.1, 1.2}}}}, + + {{{{1.3, 1.4, 1.5}, {1.6, 1.7, 1.8}}}, + + {{{1.9, 2.0, 2.1}, {2.2, 2.3, 2.4}}}}}})); + + auto biases = std::make_shared<Tensor>( + Array1D<float, outChannels>({{0.01, 0.02}})); + + auto op = setupTestConvTranspose<DIM>(batchSize, + inChannels, + outChannels, + kernelSize, + inDataSize, + stride, + dilation, + input, + weights, + biases); + + REQUIRE_NOTHROW(op->forward()); + + auto expectedOutput = std::make_shared<Tensor>( + Array5D<float, + batchSize, + outChannels, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{507.910034, + 532.210022, + 556.510010, + 511.809998, + 536.410034, + 561.010010, + 515.710022, + 540.610046, + 565.510010, + 519.609985, + 544.810059, + 570.010010, + 523.510010, + 549.010010, + 574.510010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {527.410034, + 553.210022, + 579.010010, + 531.309998, + 557.410034, + 583.510010, + 535.210022, + 561.610046, + 588.010010, + 539.109985, + 565.810059, + 592.510010, + 543.010010, + 570.010010, + 597.010010}, + {580.809998, + 605.110046, + 629.410034, + 585.609985, + 610.210022, + 634.809998, + 590.409973, + 615.310059, + 640.210022, + 595.210022, + 620.410034, + 645.609985, + 600.010010, + 625.510010, + 651.010010}, + {546.910034, + 574.210022, + 601.510010, + 550.809998, + 578.410034, + 606.010010, + 554.710022, + 582.610046, + 610.510010, + 558.609985, + 586.810059, + 615.010010, + 562.510010, + 591.010010, + 619.510010}, + {604.809998, + 630.610046, + 656.410034, + 609.609985, + 635.710022, + 661.809998, + 614.409973, + 640.810059, + 667.210022, + 619.210022, + 645.910034, + 672.609985, + 624.010010, + 651.010010, + 678.010010}, + {566.410034, + 595.210022, + 624.010010, + 570.309998, + 599.410034, + 628.510010, + 574.210022, + 603.610046, + 633.010010, + 578.109985, + 607.810059, + 637.510010, + 582.010010, + 612.010010, + 642.010010}, + {628.809998, + 656.110046, + 683.410034, + 633.609985, + 661.210022, + 688.809998, + 638.409973, + 666.310059, + 694.210022, + 643.210022, + 671.410034, + 699.609985, + 648.010010, + 676.510010, + 705.010010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {652.809998, + 681.610046, + 710.410034, + 657.609985, + 686.710022, + 715.809998, + 662.409973, + 691.810059, + 721.210022, + 667.210022, + 696.910034, + 726.609985, + 672.010010, + 702.010010, + 732.010010}}, + + {{585.910034, + 616.210022, + 646.510010, + 589.809998, + 620.410034, + 651.010010, + 593.710022, + 624.610046, + 655.510010, + 597.609985, + 628.810059, + 660.010010, + 601.510010, + 633.010010, + 664.510010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {605.410034, + 637.210022, + 669.010010, + 609.309998, + 641.410034, + 673.510010, + 613.210022, + 645.610046, + 678.010010, + 617.109985, + 649.810059, + 682.510010, + 621.010010, + 654.010010, + 687.010010}, + {676.809998, + 707.110046, + 737.410034, + 681.609985, + 712.210022, + 742.809998, + 686.409973, + 717.310059, + 748.210022, + 691.210022, + 722.410034, + 753.609985, + 696.010010, + 727.510010, + 759.010010}, + {624.910034, + 658.210022, + 691.510010, + 628.809998, + 662.410034, + 696.010010, + 632.710022, + 666.610046, + 700.510010, + 636.609985, + 670.810059, + 705.010010, + 640.510010, + 675.010010, + 709.510010}, + {700.809998, + 732.610046, + 764.410034, + 705.609985, + 737.710022, + 769.809998, + 710.409973, + 742.810059, + 775.210022, + 715.210022, + 747.910034, + 780.609985, + 720.010010, + 753.010010, + 786.010010}, + {644.410034, + 679.210022, + 714.010010, + 648.309998, + 683.410034, + 718.510010, + 652.210022, + 687.610046, + 723.010010, + 656.109985, + 691.810059, + 727.510010, + 660.010010, + 696.010010, + 732.010010}, + {724.809998, + 758.110046, + 791.410034, + 729.609985, + 763.210022, + 796.809998, + 734.409973, + 768.310059, + 802.210022, + 739.210022, + 773.410034, + 807.609985, + 744.010010, + 778.510010, + 813.010010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {748.809998, + 783.610046, + 818.410034, + 753.609985, + 788.710022, + 823.809998, + 758.409973, + 793.810059, + 829.210022, + 763.210022, + 798.910034, + 834.609985, + 768.010010, + 804.010010, + 840.010010}}, + + {{663.910034, + 700.210022, + 736.510010, + 667.809998, + 704.410034, + 741.010010, + 671.710022, + 708.610046, + 745.510010, + 675.609985, + 712.810059, + 750.010010, + 679.510010, + 717.010010, + 754.510010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {683.410034, + 721.210022, + 759.010010, + 687.309998, + 725.410034, + 763.510010, + 691.210022, + 729.610046, + 768.010010, + 695.109985, + 733.810059, + 772.510010, + 699.010010, + 738.010010, + 777.010010}, + {772.809998, + 809.110046, + 845.410034, + 777.609985, + 814.210022, + 850.809998, + 782.409973, + 819.310059, + 856.210022, + 787.210022, + 824.410034, + 861.609985, + 792.010010, + 829.510010, + 867.010010}, + {702.910034, + 742.210022, + 781.510010, + 706.809998, + 746.410034, + 786.010010, + 710.710022, + 750.610046, + 790.510010, + 714.609985, + 754.810059, + 795.010010, + 718.510010, + 759.010071, + 799.510010}, + {796.809998, + 834.610046, + 872.410034, + 801.609985, + 839.710022, + 877.810059, + 806.409973, + 844.810059, + 883.210022, + 811.210022, + 849.910034, + 888.609985, + 816.010010, + 855.010010, + 894.010010}, + {722.410034, + 763.210022, + 804.010010, + 726.309998, + 767.410034, + 808.510010, + 730.210022, + 771.610046, + 813.010010, + 734.109985, + 775.810059, + 817.510010, + 738.010010, + 780.010071, + 822.010010}, + {820.809998, + 860.110046, + 899.410034, + 825.609985, + 865.210022, + 904.810059, + 830.409973, + 870.310059, + 910.210022, + 835.210022, + 875.410034, + 915.609985, + 840.010010, + 880.510010, + 921.010010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {844.809998, + 885.610046, + 926.410034, + 849.609985, + 890.710022, + 931.810059, + 854.409973, + 895.810059, + 937.210022, + 859.210022, + 900.910034, + 942.609985, + 864.010010, + 906.010010, + 948.010010}}, + + {{741.910034, + 784.210022, + 826.510010, + 745.809998, + 788.410034, + 831.010010, + 749.710022, + 792.610046, + 835.510010, + 753.609985, + 796.810059, + 840.010010, + 757.510010, + 801.010071, + 844.510010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {761.410034, + 805.210022, + 849.010010, + 765.310059, + 809.410034, + 853.510010, + 769.210022, + 813.610046, + 858.010010, + 773.109985, + 817.810059, + 862.510010, + 777.010010, + 822.010071, + 867.010010}, + {868.809998, + 911.110046, + 953.410034, + 873.609985, + 916.210022, + 958.810059, + 878.409973, + 921.310059, + 964.210022, + 883.210022, + 926.410034, + 969.609985, + 888.010010, + 931.510010, + 975.010010}, + {780.910034, + 826.210022, + 871.510010, + 784.810059, + 830.410034, + 876.010010, + 788.710022, + 834.610046, + 880.510010, + 792.609985, + 838.810059, + 885.010010, + 796.510010, + 843.010071, + 889.510010}, + {892.809998, + 936.610046, + 980.410034, + 897.609985, + 941.710022, + 985.810059, + 902.409973, + 946.810059, + 991.210022, + 907.210022, + 951.910034, + 996.609985, + 912.010010, + 957.010010, + 1002.010010}, + {800.410034, + 847.210022, + 894.010010, + 804.310059, + 851.410034, + 898.510010, + 808.210022, + 855.610046, + 903.010010, + 812.109985, + 859.810059, + 907.510010, + 816.010010, + 864.010071, + 912.010010}, + {916.809998, + 962.110046, + 1007.410034, + 921.609985, + 967.210022, + 1012.810059, + 926.409973, + 972.310059, + 1018.210022, + 931.210022, + 977.410034, + 1023.609985, + 936.010010, + 982.510010, + 1029.010010}, + {0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000, + 0.010000}, + {940.809998, + 987.610046, + 1034.410034, + 945.609985, + 992.710022, + 1039.810059, + 950.409973, + 997.810059, + 1045.209961, + 955.210022, + 1002.910034, + 1050.609985, + 960.010010, + 1008.010010, + 1056.010010}}}, + + {{{653.720032, + 678.020020, + 702.320007, + 659.420044, + 684.020020, + 708.620056, + 665.120056, + 690.020020, + 714.920044, + 670.820007, + 696.020020, + 721.220032, + 676.520020, + 702.020020, + 727.520020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {682.220032, + 708.020020, + 733.820007, + 687.920044, + 714.020020, + 740.120056, + 693.620056, + 720.020020, + 746.420044, + 699.320007, + 726.020020, + 752.720032, + 705.020020, + 732.020020, + 759.020020}, + {726.620056, + 750.920044, + 775.220032, + 733.220032, + 757.820007, + 782.420044, + 739.820068, + 764.720032, + 789.620056, + 746.420044, + 771.619995, + 796.820068, + 753.020020, + 778.520020, + 804.020081}, + {710.720032, + 738.020020, + 765.320007, + 716.420044, + 744.020020, + 771.620056, + 722.120056, + 750.020020, + 777.920044, + 727.820068, + 756.020020, + 784.220032, + 733.520020, + 762.020020, + 790.520020}, + {759.620056, + 785.420044, + 811.220032, + 766.220032, + 792.320007, + 818.420044, + 772.820068, + 799.220032, + 825.620056, + 779.420044, + 806.119995, + 832.820068, + 786.020020, + 813.020020, + 840.020081}, + {739.220032, + 768.020020, + 796.820007, + 744.920044, + 774.020020, + 803.120056, + 750.620056, + 780.020020, + 809.420044, + 756.320068, + 786.020020, + 815.720032, + 762.020020, + 792.020020, + 822.020020}, + {792.620056, + 819.920044, + 847.220032, + 799.220032, + 826.820007, + 854.420044, + 805.820068, + 833.720032, + 861.620056, + 812.420044, + 840.619995, + 868.820068, + 819.020020, + 847.520020, + 876.020081}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {825.620056, + 854.420044, + 883.220032, + 832.220032, + 861.320007, + 890.420044, + 838.820068, + 868.220032, + 897.620056, + 845.420044, + 875.119995, + 904.820068, + 852.020020, + 882.020020, + 912.020020}}, + + {{767.720032, + 798.020020, + 828.320007, + 773.420044, + 804.020020, + 834.620056, + 779.120056, + 810.020020, + 840.920044, + 784.820068, + 816.020020, + 847.220032, + 790.520020, + 822.020020, + 853.520020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {796.220032, + 828.020020, + 859.820007, + 801.920044, + 834.020020, + 866.120056, + 807.620056, + 840.020020, + 872.420044, + 813.320068, + 846.020020, + 878.720032, + 819.020020, + 852.020020, + 885.020020}, + {858.620056, + 888.920044, + 919.220032, + 865.220032, + 895.820007, + 926.420044, + 871.820068, + 902.719971, + 933.620056, + 878.420044, + 909.619995, + 940.820068, + 885.020020, + 916.520020, + 948.020020}, + {824.720032, + 858.020020, + 891.320068, + 830.420044, + 864.020020, + 897.620056, + 836.120056, + 870.020020, + 903.920044, + 841.820068, + 876.020020, + 910.220032, + 847.520020, + 882.020020, + 916.520020}, + {891.620056, + 923.420044, + 955.220032, + 898.220032, + 930.320007, + 962.420044, + 904.820068, + 937.219971, + 969.620056, + 911.420044, + 944.119995, + 976.820068, + 918.020020, + 951.020020, + 984.020020}, + {853.220032, + 888.020020, + 922.820068, + 858.920044, + 894.020020, + 929.120056, + 864.620056, + 900.020020, + 935.420044, + 870.320068, + 906.020020, + 941.720032, + 876.020020, + 912.020020, + 948.020020}, + {924.620056, + 957.920044, + 991.220032, + 931.220032, + 964.820007, + 998.420044, + 937.820068, + 971.719971, + 1005.620056, + 944.420044, + 978.619995, + 1012.820068, + 951.020020, + 985.520020, + 1020.020020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {957.620056, + 992.420044, + 1027.220093, + 964.220032, + 999.320007, + 1034.420044, + 970.820068, + 1006.219971, + 1041.620117, + 977.420044, + 1013.119995, + 1048.820068, + 984.020020, + 1020.020020, + 1056.020020}}, + + {{881.720032, + 918.020020, + 954.320068, + 887.420044, + 924.020020, + 960.620056, + 893.120056, + 930.020020, + 966.920044, + 898.820068, + 936.020020, + 973.220032, + 904.520020, + 942.020020, + 979.520020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {910.220032, + 948.020020, + 985.820068, + 915.920044, + 954.020020, + 992.120056, + 921.620056, + 960.020020, + 998.420044, + 927.320068, + 966.020020, + 1004.720032, + 933.020020, + 972.020020, + 1011.020020}, + {990.620056, + 1026.920044, + 1063.220093, + 997.220032, + 1033.820068, + 1070.420044, + 1003.820068, + 1040.719971, + 1077.620117, + 1010.420044, + 1047.619995, + 1084.820068, + 1017.020020, + 1054.520020, + 1092.020020}, + {938.720032, + 978.020020, + 1017.320068, + 944.420044, + 984.020020, + 1023.620056, + 950.120056, + 990.020020, + 1029.920044, + 955.820068, + 996.020020, + 1036.220093, + 961.520081, + 1002.020020, + 1042.520020}, + {1023.620056, + 1061.420044, + 1099.220093, + 1030.220093, + 1068.320068, + 1106.420044, + 1036.820068, + 1075.219971, + 1113.620117, + 1043.420044, + 1082.119995, + 1120.820068, + 1050.020020, + 1089.020020, + 1128.020020}, + {967.220032, + 1008.020020, + 1048.820068, + 972.920044, + 1014.020020, + 1055.119995, + 978.620056, + 1020.020020, + 1061.420044, + 984.320068, + 1026.020020, + 1067.720093, + 990.020081, + 1032.020020, + 1074.020020}, + {1056.619995, + 1095.920044, + 1135.220093, + 1063.220093, + 1102.820068, + 1142.420044, + 1069.820068, + 1109.719971, + 1149.620117, + 1076.420044, + 1116.619995, + 1156.820068, + 1083.020020, + 1123.520020, + 1164.020020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {1089.619995, + 1130.420044, + 1171.220093, + 1096.220093, + 1137.320068, + 1178.420044, + 1102.820068, + 1144.219971, + 1185.620117, + 1109.420044, + 1151.119995, + 1192.820068, + 1116.020020, + 1158.020020, + 1200.020020}}, + + {{995.720032, + 1038.020020, + 1080.320068, + 1001.420044, + 1044.020020, + 1086.619995, + 1007.120056, + 1050.020020, + 1092.920044, + 1012.820068, + 1056.020020, + 1099.220093, + 1018.520081, + 1062.020020, + 1105.520020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {1024.220093, + 1068.020020, + 1111.820068, + 1029.920044, + 1074.020020, + 1118.119995, + 1035.619995, + 1080.020020, + 1124.420044, + 1041.320068, + 1086.020020, + 1130.720093, + 1047.020020, + 1092.020020, + 1137.020020}, + {1122.619995, + 1164.920044, + 1207.220093, + 1129.220093, + 1171.820068, + 1214.420044, + 1135.820068, + 1178.719971, + 1221.620117, + 1142.420044, + 1185.619995, + 1228.820068, + 1149.020020, + 1192.520020, + 1236.020020}, + {1052.720093, + 1098.020020, + 1143.320068, + 1058.420044, + 1104.020020, + 1149.619995, + 1064.119995, + 1110.020020, + 1155.920044, + 1069.820068, + 1116.020020, + 1162.220093, + 1075.520020, + 1122.020020, + 1168.520020}, + {1155.619995, + 1199.420044, + 1243.220093, + 1162.220093, + 1206.320068, + 1250.420044, + 1168.820068, + 1213.219971, + 1257.620117, + 1175.420044, + 1220.119995, + 1264.820068, + 1182.020020, + 1227.020020, + 1272.020020}, + {1081.220093, + 1128.020020, + 1174.820068, + 1086.920044, + 1134.020020, + 1181.119995, + 1092.619995, + 1140.020020, + 1187.420044, + 1098.320068, + 1146.020020, + 1193.720093, + 1104.020020, + 1152.020020, + 1200.020020}, + {1188.619995, + 1233.920044, + 1279.220093, + 1195.220093, + 1240.820068, + 1286.420044, + 1201.820068, + 1247.719971, + 1293.620117, + 1208.420044, + 1254.619995, + 1300.820068, + 1215.020020, + 1261.520020, + 1308.020020}, + {0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000, + 0.020000}, + {1221.619995, + 1268.420044, + 1315.220093, + 1228.220093, + 1275.320068, + 1322.420044, + 1234.820068, + 1282.219971, + 1329.620117, + 1241.420044, + 1289.119995, + 1336.820068, + 1248.020020, + 1296.020020, + 1344.020020}}}}}})); + } + } } } // namespace Aidge diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp index f7823d022c8d3b228740a3df3f1d01224cd346c6..5efc5bd36fb582cb2cf27ba739064fbd410a0751 100644 --- a/unit_tests/operator/Test_PadImpl.cpp +++ b/unit_tests/operator/Test_PadImpl.cpp @@ -9,6 +9,9 @@ * ********************************************************************************/ +#include <aidge/utils/ArrayHelpers.hpp> +#include <aidge/utils/TensorUtils.hpp> +#include <aidge/utils/Types.h> #include <memory> #include <catch2/catch_test_macros.hpp> @@ -22,550 +25,694 @@ using namespace Aidge; -TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { - SECTION("Symmetric Pad") { - const int pv = 0; // pad value - - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 0, 1, 2, 3, 4, pv}, - { pv, 5, 6, 7, 8, 9, pv}, - { pv, 10, 11, 12, 13, 14, pv}, - { pv, 15, 16, 17, 18, 19, pv}, - { pv, 20, 21, 22, 23, 24, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 25, 26, 27, 28, 29, pv}, - { pv, 30, 31, 32, 33, 34, pv}, - { pv, 35, 36, 37, 38, 39, pv}, - { pv, 40, 41, 42, 43, 44, pv}, - { pv, 45, 46, 47, 48, 49, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 50, 51, 52, 53, 54, pv}, - { pv, 55, 56, 57, 58, 59, pv}, - { pv, 60, 61, 62, 63, 64, pv}, - { pv, 65, 66, 67, 68, 69, pv}, - { pv, 70, 71, 72, 73, 74, pv}, - { pv, pv, pv, pv, pv, pv, pv}} - }, - { - {{ pv, pv, pv, pv, pv, pv, pv}, - { pv, 75, 76, 77, 78, 79, pv}, - { pv, 80, 81, 82, 83, 84, pv}, - { pv, 85, 86, 87, 88, 89, pv}, - { pv, 90, 91, 92, 93, 94, pv}, - { pv, 95, 96, 97, 98, 99, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - {pv, 100, 101, 102, 103, 104, pv}, - {pv, 105, 106, 107, 108, 109, pv}, - {pv, 110, 111, 112, 113, 114, pv}, - {pv, 115, 116, 117, 118, 119, pv}, - {pv, 120, 121, 122, 123, 124, pv}, - { pv, pv, pv, pv, pv, pv, pv}}, - - {{ pv, pv, pv, pv, pv, pv, pv}, - {pv, 125, 126, 127, 128, 129, pv}, - {pv, 130, 131, 132, 133, 134, pv}, - {pv, 135, 136, 137, 138, 139, pv}, - {pv, 140, 141, 142, 143, 144, pv}, - {pv, 145, 146, 147, 148, 149, pv}, - { pv, pv, pv, pv, pv, pv, pv}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); - myPad->getOperator()->setDataType(DataType::Int32); - myPad->getOperator()->setBackend("cpu"); - myPad->forward(); - // myPad->getOperator()->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); - } +template <DimSize_t DIM> +static std::shared_ptr<OperatorTensor> +setupTestPad(std::array<DimSize_t, 2 * DIM> beginEndBorder, + const std::shared_ptr<Tensor> input, + PadBorderType padType, + double borderValue) { + input->setBackend("cpu"); + std::shared_ptr<Node> padNode = + Pad<DIM>(beginEndBorder, "pad_op", padType, borderValue); + auto op = std::static_pointer_cast<OperatorTensor>(padNode->getOperator()); - SECTION("Asymmetric Pad") { - const int pv = 0; // pad value - - std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW - { - { - {{ pv, pv, pv, pv, pv, pv}, - { 0, 1, 2, 3, 4, pv}, - { 5, 6, 7, 8, 9, pv}, - { 10, 11, 12, 13, 14, pv}, - { 15, 16, 17, 18, 19, pv}, - { 20, 21, 22, 23, 24, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 25, 26, 27, 28, 29, pv}, - { 30, 31, 32, 33, 34, pv}, - { 35, 36, 37, 38, 39, pv}, - { 40, 41, 42, 43, 44, pv}, - { 45, 46, 47, 48, 49, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 50, 51, 52, 53, 54, pv}, - { 55, 56, 57, 58, 59, pv}, - { 60, 61, 62, 63, 64, pv}, - { 65, 66, 67, 68, 69, pv}, - { 70, 71, 72, 73, 74, pv}} - }, - { - {{ pv, pv, pv, pv, pv, pv}, - { 75, 76, 77, 78, 79, pv}, - { 80, 81, 82, 83, 84, pv}, - { 85, 86, 87, 88, 89, pv}, - { 90, 91, 92, 93, 94, pv}, - { 95, 96, 97, 98, 99, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 100, 101, 102, 103, 104, pv}, - { 105, 106, 107, 108, 109, pv}, - { 110, 111, 112, 113, 114, pv}, - { 115, 116, 117, 118, 119, pv}, - { 120, 121, 122, 123, 124, pv}}, - - {{ pv, pv, pv, pv, pv, pv}, - { 125, 126, 127, 128, 129, pv}, - { 130, 131, 132, 133, 134, pv}, - { 135, 136, 137, 138, 139, pv}, - { 140, 141, 142, 143, 144, pv}, - { 145, 146, 147, 148, 149, pv}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); - myPad->getOperator()->setDataType(DataType::Int32); - myPad->getOperator()->setBackend("cpu"); - myPad->forward(); - // myPad->getOperator()->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); - } + op->setDataType(DataType::Float32); + op->setBackend("cpu"); - SECTION("Pad Edge") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ 0, 0, 1, 2, 3, 4, 4}, - { 0, 0, 1, 2, 3, 4, 4}, - { 5, 5, 6, 7, 8, 9, 9}, - { 10, 10, 11, 12, 13, 14, 14}, - { 15, 15, 16, 17, 18, 19, 19}, - { 20, 20, 21, 22, 23, 24, 24}, - { 20, 20, 21, 22, 23, 24, 24}}, - - {{ 25, 25, 26, 27, 28, 29, 29}, - { 25, 25, 26, 27, 28, 29, 29}, - { 30, 30, 31, 32, 33, 34, 34}, - { 35, 35, 36, 37, 38, 39, 39}, - { 40, 40, 41, 42, 43, 44, 44}, - { 45, 45, 46, 47, 48, 49, 49}, - { 45, 45, 46, 47, 48, 49, 49}}, - - {{ 50, 50, 51, 52, 53, 54, 54}, - { 50, 50, 51, 52, 53, 54, 54}, - { 55, 55, 56, 57, 58, 59, 59}, - { 60, 60, 61, 62, 63, 64, 64}, - { 65, 65, 66, 67, 68, 69, 69}, - { 70, 70, 71, 72, 73, 74, 74}, - { 70, 70, 71, 72, 73, 74, 74}} - }, - { - {{ 75, 75, 76, 77, 78, 79, 79}, - { 75, 75, 76, 77, 78, 79, 79}, - { 80, 80, 81, 82, 83, 84, 84}, - { 85, 85, 86, 87, 88, 89, 89}, - { 90, 90, 91, 92, 93, 94, 94}, - { 95, 95, 96, 97, 98, 99, 99}, - { 95, 95, 96, 97, 98, 99, 99}}, - - {{100, 100, 101, 102, 103, 104, 104}, - {100, 100, 101, 102, 103, 104, 104}, - {105, 105, 106, 107, 108, 109, 109}, - {110, 110, 111, 112, 113, 114, 114}, - {115, 115, 116, 117, 118, 119, 119}, - {120, 120, 121, 122, 123, 124, 124}, - {120, 120, 121, 122, 123, 124, 124}}, - - {{125, 125, 126, 127, 128, 129, 129}, - {125, 125, 126, 127, 128, 129, 129}, - {130, 130, 131, 132, 133, 134, 134}, - {135, 135, 136, 137, 138, 139, 139}, - {140, 140, 141, 142, 143, 144, 144}, - {145, 145, 146, 147, 148, 149, 149}, - {145, 145, 146, 147, 148, 149, 149}} - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); - myPad->getOperator()->setDataType(DataType::Int32); - myPad->getOperator()->setBackend("cpu"); - myPad->forward(); - // myPad->getOperator()->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); - } + op->associateInput(0, input); - SECTION("Pad Reflect") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - { - { 6, 5, 6, 7, 8, 9, 5}, - { 1, 0, 1, 2, 3, 4, 0}, - { 6, 5, 6, 7, 8, 9, 5}, - { 11, 10, 11, 12, 13, 14, 10}, - { 16, 15, 16, 17, 18, 19, 15}, - { 21, 20, 21, 22, 23, 24, 20}, - { 1, 0, 1, 2, 3, 4, 0} - }, - { - { 31, 30, 31, 32, 33, 34, 30}, - { 26, 25, 26, 27, 28, 29, 25}, - { 31, 30, 31, 32, 33, 34, 30}, - { 36, 35, 36, 37, 38, 39, 35}, - { 41, 40, 41, 42, 43, 44, 40}, - { 46, 45, 46, 47, 48, 49, 45}, - { 26, 25, 26, 27, 28, 29, 25} - }, - { - { 56, 55, 56, 57, 58, 59, 55}, - { 51, 50, 51, 52, 53, 54, 50}, - { 56, 55, 56, 57, 58, 59, 55}, - { 61, 60, 61, 62, 63, 64, 60}, - { 66, 65, 66, 67, 68, 69, 65}, - { 71, 70, 71, 72, 73, 74, 70}, - { 51, 50, 51, 52, 53, 54, 50} - } - }, - { - { - { 81, 80, 81, 82, 83, 84, 80}, - { 76, 75, 76, 77, 78, 79, 75}, - { 81, 80, 81, 82, 83, 84, 80}, - { 86, 85, 86, 87, 88, 89, 85}, - { 91, 90, 91, 92, 93, 94, 90}, - { 96, 95, 96, 97, 98, 99, 95}, - { 76, 75, 76, 77, 78, 79, 75} - }, - { - { 106, 105, 106, 107, 108, 109, 105}, - { 101, 100, 101, 102, 103, 104, 100}, - { 106, 105, 106, 107, 108, 109, 105}, - { 111, 110, 111, 112, 113, 114, 110}, - { 116, 115, 116, 117, 118, 119, 115}, - { 121, 120, 121, 122, 123, 124, 120}, - { 101, 100, 101, 102, 103, 104, 100} - }, - { - { 131, 130, 131, 132, 133, 134, 130}, - { 126, 125, 126, 127, 128, 129, 125}, - { 131, 130, 131, 132, 133, 134, 130}, - { 136, 135, 136, 137, 138, 139, 135}, - { 141, 140, 141, 142, 143, 144, 140}, - { 146, 145, 146, 147, 148, 149, 145}, - { 126, 125, 126, 127, 128, 129, 125} - } - } - } - }); - - myPad->getOperator()->associateInput(0,myInput); - myPad->getOperator()->setDataType(DataType::Int32); - myPad->getOperator()->setBackend("cpu"); - myPad->forward(); - op->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); - } + REQUIRE_NOTHROW(op->forwardDims(true)); - SECTION("Pad Wrap") { - std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap); - auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}} - }, - { - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}}, - - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}} - } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW - { - { - {{ 24, 20, 21, 22, 23, 24, 20}, - { 4, 0, 1, 2, 3, 4, 0}, - { 9, 5, 6, 7, 8, 9, 5}, - { 14, 10, 11, 12, 13, 14, 10}, - { 19, 15, 16, 17, 18, 19, 15}, - { 24, 20, 21, 22, 23, 24, 20}, - { 4, 0, 1, 2, 3, 4, 0}}, - - {{ 49, 45, 46, 47, 48, 49, 45}, - { 29, 25, 26, 27, 28, 29, 25}, - { 34, 30, 31, 32, 33, 34, 30}, - { 39, 35, 36, 37, 38, 39, 35}, - { 44, 40, 41, 42, 43, 44, 40}, - { 49, 45, 46, 47, 48, 49, 45}, - { 29, 25, 26, 27, 28, 29, 25}}, - - {{ 74, 70, 71, 72, 73, 74, 70}, - { 54, 50, 51, 52, 53, 54, 50}, - { 59, 55, 56, 57, 58, 59, 55}, - { 64, 60, 61, 62, 63, 64, 60}, - { 69, 65, 66, 67, 68, 69, 65}, - { 74, 70, 71, 72, 73, 74, 70}, - { 54, 50, 51, 52, 53, 54, 50}} - }, - { - {{ 99, 95, 96, 97, 98, 99, 95}, - { 79, 75, 76, 77, 78, 79, 75}, - { 84, 80, 81, 82, 83, 84, 80}, - { 89, 85, 86, 87, 88, 89, 85}, - { 94, 90, 91, 92, 93, 94, 90}, - { 99, 95, 96, 97, 98, 99, 95}, - { 79, 75, 76, 77, 78, 79, 75}}, - - {{124, 120, 121, 122, 123, 124, 120}, - {104, 100, 101, 102, 103, 104, 100}, - {109, 105, 106, 107, 108, 109, 105}, - {114, 110, 111, 112, 113, 114, 110}, - {119, 115, 116, 117, 118, 119, 115}, - {124, 120, 121, 122, 123, 124, 120}, - {104, 100, 101, 102, 103, 104, 100}}, - - {{149, 145, 146, 147, 148, 149, 145}, - {129, 125, 126, 127, 128, 129, 125}, - {134, 130, 131, 132, 133, 134, 130}, - {139, 135, 136, 137, 138, 139, 135}, - {144, 140, 141, 142, 143, 144, 140}, - {149, 145, 146, 147, 148, 149, 145}, - {129, 125, 126, 127, 128, 129, 125}} - } + return op; +} + +TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { + SECTION("2D") { + SECTION("Symmetric Pad") { + const int pv = 0; // pad value + + std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, + "mypad", + PadBorderType::Constant, + static_cast<double>(pv)); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 7, 7>{// NCHW + {{{{pv, pv, pv, pv, pv, pv, pv}, + {pv, 0, 1, 2, 3, 4, pv}, + {pv, 5, 6, 7, 8, 9, pv}, + {pv, 10, 11, 12, 13, 14, pv}, + {pv, 15, 16, 17, 18, 19, pv}, + {pv, 20, 21, 22, 23, 24, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 25, 26, 27, 28, 29, pv}, + {pv, 30, 31, 32, 33, 34, pv}, + {pv, 35, 36, 37, 38, 39, pv}, + {pv, 40, 41, 42, 43, 44, pv}, + {pv, 45, 46, 47, 48, 49, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 50, 51, 52, 53, 54, pv}, + {pv, 55, 56, 57, 58, 59, pv}, + {pv, 60, 61, 62, 63, 64, pv}, + {pv, 65, 66, 67, 68, 69, pv}, + {pv, 70, 71, 72, 73, 74, pv}, + {pv, pv, pv, pv, pv, pv, pv}}}, + {{{pv, pv, pv, pv, pv, pv, pv}, + {pv, 75, 76, 77, 78, 79, pv}, + {pv, 80, 81, 82, 83, 84, pv}, + {pv, 85, 86, 87, 88, 89, pv}, + {pv, 90, 91, 92, 93, 94, pv}, + {pv, 95, 96, 97, 98, 99, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 100, 101, 102, 103, 104, pv}, + {pv, 105, 106, 107, 108, 109, pv}, + {pv, 110, 111, 112, 113, 114, pv}, + {pv, 115, 116, 117, 118, 119, pv}, + {pv, 120, 121, 122, 123, 124, pv}, + {pv, pv, pv, pv, pv, pv, pv}}, + + {{pv, pv, pv, pv, pv, pv, pv}, + {pv, 125, 126, 127, 128, 129, pv}, + {pv, 130, 131, 132, 133, 134, pv}, + {pv, 135, 136, 137, 138, 139, pv}, + {pv, 140, 141, 142, 143, 144, pv}, + {pv, 145, 146, 147, 148, 149, pv}, + {pv, pv, pv, pv, pv, pv, pv}}}}}); + + myPad->getOperator()->associateInput(0, myInput); + myPad->getOperator()->setDataType(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + + SECTION("Asymmetric Pad") { + const int pv = 0; // pad value + + std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, + "mypad", + PadBorderType::Constant, + static_cast<double>(pv)); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 6, 6>{// NCHW + {{{{pv, pv, pv, pv, pv, pv}, + {0, 1, 2, 3, 4, pv}, + {5, 6, 7, 8, 9, pv}, + {10, 11, 12, 13, 14, pv}, + {15, 16, 17, 18, 19, pv}, + {20, 21, 22, 23, 24, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {25, 26, 27, 28, 29, pv}, + {30, 31, 32, 33, 34, pv}, + {35, 36, 37, 38, 39, pv}, + {40, 41, 42, 43, 44, pv}, + {45, 46, 47, 48, 49, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {50, 51, 52, 53, 54, pv}, + {55, 56, 57, 58, 59, pv}, + {60, 61, 62, 63, 64, pv}, + {65, 66, 67, 68, 69, pv}, + {70, 71, 72, 73, 74, pv}}}, + {{{pv, pv, pv, pv, pv, pv}, + {75, 76, 77, 78, 79, pv}, + {80, 81, 82, 83, 84, pv}, + {85, 86, 87, 88, 89, pv}, + {90, 91, 92, 93, 94, pv}, + {95, 96, 97, 98, 99, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {100, 101, 102, 103, 104, pv}, + {105, 106, 107, 108, 109, pv}, + {110, 111, 112, 113, 114, pv}, + {115, 116, 117, 118, 119, pv}, + {120, 121, 122, 123, 124, pv}}, + + {{pv, pv, pv, pv, pv, pv}, + {125, 126, 127, 128, 129, pv}, + {130, 131, 132, 133, 134, pv}, + {135, 136, 137, 138, 139, pv}, + {140, 141, 142, 143, 144, pv}, + {145, 146, 147, 148, 149, pv}}}}}); + + myPad->getOperator()->associateInput(0, myInput); + myPad->getOperator()->setDataType(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + + SECTION("Pad Edge") { + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{0, 0, 1, 2, 3, 4, 4}, + {0, 0, 1, 2, 3, 4, 4}, + {5, 5, 6, 7, 8, 9, 9}, + {10, 10, 11, 12, 13, 14, 14}, + {15, 15, 16, 17, 18, 19, 19}, + {20, 20, 21, 22, 23, 24, 24}, + {20, 20, 21, 22, 23, 24, 24}}, + + {{25, 25, 26, 27, 28, 29, 29}, + {25, 25, 26, 27, 28, 29, 29}, + {30, 30, 31, 32, 33, 34, 34}, + {35, 35, 36, 37, 38, 39, 39}, + {40, 40, 41, 42, 43, 44, 44}, + {45, 45, 46, 47, 48, 49, 49}, + {45, 45, 46, 47, 48, 49, 49}}, + + {{50, 50, 51, 52, 53, 54, 54}, + {50, 50, 51, 52, 53, 54, 54}, + {55, 55, 56, 57, 58, 59, 59}, + {60, 60, 61, 62, 63, 64, 64}, + {65, 65, 66, 67, 68, 69, 69}, + {70, 70, 71, 72, 73, 74, 74}, + {70, 70, 71, 72, 73, 74, 74}}}, + {{{75, 75, 76, 77, 78, 79, 79}, + {75, 75, 76, 77, 78, 79, 79}, + {80, 80, 81, 82, 83, 84, 84}, + {85, 85, 86, 87, 88, 89, 89}, + {90, 90, 91, 92, 93, 94, 94}, + {95, 95, 96, 97, 98, 99, 99}, + {95, 95, 96, 97, 98, 99, 99}}, + + {{100, 100, 101, 102, 103, 104, 104}, + {100, 100, 101, 102, 103, 104, 104}, + {105, 105, 106, 107, 108, 109, 109}, + {110, 110, 111, 112, 113, 114, 114}, + {115, 115, 116, 117, 118, 119, 119}, + {120, 120, 121, 122, 123, 124, 124}, + {120, 120, 121, 122, 123, 124, 124}}, + + {{125, 125, 126, 127, 128, 129, 129}, + {125, 125, 126, 127, 128, 129, 129}, + {130, 130, 131, 132, 133, 134, 134}, + {135, 135, 136, 137, 138, 139, 139}, + {140, 140, 141, 142, 143, 144, 144}, + {145, 145, 146, 147, 148, 149, 149}, + {145, 145, 146, 147, 148, 149, 149}}}}}); + + myPad->getOperator()->associateInput(0, myInput); + myPad->getOperator()->setDataType(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + + SECTION("Pad Reflect") { + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{6, 5, 6, 7, 8, 9, 5}, + {1, 0, 1, 2, 3, 4, 0}, + {6, 5, 6, 7, 8, 9, 5}, + {11, 10, 11, 12, 13, 14, 10}, + {16, 15, 16, 17, 18, 19, 15}, + {21, 20, 21, 22, 23, 24, 20}, + {1, 0, 1, 2, 3, 4, 0}}, + {{31, 30, 31, 32, 33, 34, 30}, + {26, 25, 26, 27, 28, 29, 25}, + {31, 30, 31, 32, 33, 34, 30}, + {36, 35, 36, 37, 38, 39, 35}, + {41, 40, 41, 42, 43, 44, 40}, + {46, 45, 46, 47, 48, 49, 45}, + {26, 25, 26, 27, 28, 29, 25}}, + {{56, 55, 56, 57, 58, 59, 55}, + {51, 50, 51, 52, 53, 54, 50}, + {56, 55, 56, 57, 58, 59, 55}, + {61, 60, 61, 62, 63, 64, 60}, + {66, 65, 66, 67, 68, 69, 65}, + {71, 70, 71, 72, 73, 74, 70}, + {51, 50, 51, 52, 53, 54, 50}}}, + {{{81, 80, 81, 82, 83, 84, 80}, + {76, 75, 76, 77, 78, 79, 75}, + {81, 80, 81, 82, 83, 84, 80}, + {86, 85, 86, 87, 88, 89, 85}, + {91, 90, 91, 92, 93, 94, 90}, + {96, 95, 96, 97, 98, 99, 95}, + {76, 75, 76, 77, 78, 79, 75}}, + {{106, 105, 106, 107, 108, 109, 105}, + {101, 100, 101, 102, 103, 104, 100}, + {106, 105, 106, 107, 108, 109, 105}, + {111, 110, 111, 112, 113, 114, 110}, + {116, 115, 116, 117, 118, 119, 115}, + {121, 120, 121, 122, 123, 124, 120}, + {101, 100, 101, 102, 103, 104, 100}}, + {{131, 130, 131, 132, 133, 134, 130}, + {126, 125, 126, 127, 128, 129, 125}, + {131, 130, 131, 132, 133, 134, 130}, + {136, 135, 136, 137, 138, 139, 135}, + {141, 140, 141, 142, 143, 144, 140}, + {146, 145, 146, 147, 148, 149, 145}, + {126, 125, 126, 127, 128, 129, 125}}}}}); + + myPad->getOperator()->associateInput(0, myInput); + myPad->getOperator()->setDataType(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + myPad->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + + SECTION("Pad Wrap") { + std::shared_ptr<Node> myPad = + Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap); + auto op = + std::static_pointer_cast<OperatorTensor>(myPad->getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array4D<int, 2, 3, 5, 5>{// NCHW + {{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + std::shared_ptr<Tensor> myOutput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{ + // NCHW + {{{{24, 20, 21, 22, 23, 24, 20}, + {4, 0, 1, 2, 3, 4, 0}, + {9, 5, 6, 7, 8, 9, 5}, + {14, 10, 11, 12, 13, 14, 10}, + {19, 15, 16, 17, 18, 19, 15}, + {24, 20, 21, 22, 23, 24, 20}, + {4, 0, 1, 2, 3, 4, 0}}, + + {{49, 45, 46, 47, 48, 49, 45}, + {29, 25, 26, 27, 28, 29, 25}, + {34, 30, 31, 32, 33, 34, 30}, + {39, 35, 36, 37, 38, 39, 35}, + {44, 40, 41, 42, 43, 44, 40}, + {49, 45, 46, 47, 48, 49, 45}, + {29, 25, 26, 27, 28, 29, 25}}, + + {{74, 70, 71, 72, 73, 74, 70}, + {54, 50, 51, 52, 53, 54, 50}, + {59, 55, 56, 57, 58, 59, 55}, + {64, 60, 61, 62, 63, 64, 60}, + {69, 65, 66, 67, 68, 69, 65}, + {74, 70, 71, 72, 73, 74, 70}, + {54, 50, 51, 52, 53, 54, 50}}}, + {{{99, 95, 96, 97, 98, 99, 95}, + {79, 75, 76, 77, 78, 79, 75}, + {84, 80, 81, 82, 83, 84, 80}, + {89, 85, 86, 87, 88, 89, 85}, + {94, 90, 91, 92, 93, 94, 90}, + {99, 95, 96, 97, 98, 99, 95}, + {79, 75, 76, 77, 78, 79, 75}}, + + {{124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}, + {109, 105, 106, 107, 108, 109, 105}, + {114, 110, 111, 112, 113, 114, 110}, + {119, 115, 116, 117, 118, 119, 115}, + {124, 120, 121, 122, 123, 124, 120}, + {104, 100, 101, 102, 103, 104, 100}}, + + {{149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}, + {134, 130, 131, 132, 133, 134, 130}, + {139, 135, 136, 137, 138, 139, 135}, + {144, 140, 141, 142, 143, 144, 140}, + {149, 145, 146, 147, 148, 149, 145}, + {129, 125, 126, 127, 128, 129, 125}}}}}); + + myPad->getOperator()->associateInput(0, myInput); + myPad->getOperator()->setDataType(DataType::Int32); + myPad->getOperator()->setBackend("cpu"); + myPad->forward(); + // myPad->getOperator()->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + } + SECTION("3D") { + constexpr DimSize_t DIM = 3; + SECTION("PadBorderType::Constant") { + constexpr DimSize_t batch = 1; + constexpr DimSize_t channel = 1; + constexpr std::array<DimSize_t, DIM> inDataSize = {2, 2, 2}; + constexpr std::array<DimSize_t, 2 * DIM> beginEndBorder = + {1, 1, 1, 1, 1, 1}; + constexpr std::array<DimSize_t, DIM> outDataSize = { + inDataSize[0] + beginEndBorder[0] + beginEndBorder[3], + inDataSize[1] + beginEndBorder[1] + beginEndBorder[4], + inDataSize[2] + beginEndBorder[2] + beginEndBorder[5]}; + constexpr double borderValue = 10; + + auto input = std::make_shared<Tensor>( + Array5D<float, + batch, + channel, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{-1, 4}, {-2, -5}}, + + {{-2, 4}, {2, -2}}}}}})); + auto padOp = setupTestPad<DIM>(beginEndBorder, + input, + PadBorderType::Constant, + borderValue); + + REQUIRE_NOTHROW(padOp->forward()); + + Tensor expectedOutput( + Array5D<float, + batch, + channel, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}}, + + {{10, 10, 10, 10}, + {10, -1, 4, 10}, + {10, -2, -5, 10}, + {10, 10, 10, 10}}, + + {{10, 10, 10, 10}, + {10, -2, 4, 10}, + {10, 2, -2, 10}, + {10, 10, 10, 10}}, + + {{10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}}}}}})); + CHECK(approxEq<float>(*padOp->getOutput(0), expectedOutput)); + } + SECTION("PadBorderType::Edge") { + SECTION("small test") { + constexpr DimSize_t batch = 1; + constexpr DimSize_t channel = 1; + constexpr std::array<DimSize_t, DIM> inDataSize = {1, 2, 2}; + constexpr std::array<DimSize_t, 2 * DIM> beginEndBorder = + {1, 1, 1, 1, 1, 1}; + constexpr std::array<DimSize_t, DIM> outDataSize = { + inDataSize[0] + beginEndBorder[0] + beginEndBorder[3], + inDataSize[1] + beginEndBorder[1] + beginEndBorder[4], + inDataSize[2] + beginEndBorder[2] + beginEndBorder[5]}; + auto input = std::make_shared<Tensor>( + Array5D<float, + batch, + channel, + inDataSize[0], + inDataSize[1], + inDataSize[2]>({{{{{{-1, 4}, {-2, -5}}}}}})); + auto padOp = setupTestPad<DIM>(beginEndBorder, + input, + PadBorderType::Edge, + 0); + + REQUIRE_NOTHROW(padOp->forward()); + + Tensor expectedOutput( + Array5D<float, + batch, + channel, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{-1, -1, 4, 4}, + {-1, -1, 4, 4}, + {-2, -2, -5, -5}, + {-2, -2, -5, -5}}, + + {{-1, -1, 4, 4}, + {-1, -1, 4, 4}, + {-2, -2, -5, -5}, + {-2, -2, -5, -5}}, + + {{-1, -1, 4, 4}, + {-1, -1, 4, 4}, + {-2, -2, -5, -5}, + {-2, -2, -5, -5}}}}}})); + CHECK(approxEq<float>(*padOp->getOutput(0), expectedOutput)); } - }); - - myPad->getOperator()->associateInput(0,myInput); - myPad->getOperator()->setDataType(DataType::Int32); - myPad->getOperator()->setBackend("cpu"); - myPad->forward(); - // myPad->getOperator()->getOutput(0)->print(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + } + SECTION("PadBorderType::Reflect") { + constexpr DimSize_t batch = 1; + constexpr DimSize_t channel = 1; + constexpr std::array<DimSize_t, DIM> inDataSize = {1, 3, 3}; + constexpr std::array<DimSize_t, 2 * DIM> beginEndBorder = + {0, 0, 2, 0, 0, 2}; + constexpr std::array<DimSize_t, DIM> outDataSize = { + inDataSize[0] + beginEndBorder[0] + beginEndBorder[3], + inDataSize[1] + beginEndBorder[1] + beginEndBorder[4], + inDataSize[2] + beginEndBorder[2] + beginEndBorder[5]}; + auto input = std::make_shared<Tensor>(Array5D<float, + batch, + channel, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{-1, 4, -2}, {-5, -2, 4}, {2, -2, 2}}}}}})); + auto padOp = setupTestPad<DIM>(beginEndBorder, + input, + PadBorderType::Reflect, + 0); + + REQUIRE_NOTHROW(padOp->forward()); + + Tensor expectedOutput( + Array5D<float, + batch, + channel, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{-2, 4, -1, 4, -2, 4, -1}, + {4, -2, -5, -2, 4, -2, -5}, + {2, -2, 2, -2, 2, -2, 2}}}}}})); + CHECK(approxEq<float>(*padOp->getOutput(0), expectedOutput)); + } + SECTION("PadBorderType::Wrap") { + constexpr DimSize_t batch = 1; + constexpr DimSize_t channel = 1; + constexpr std::array<DimSize_t, DIM> inDataSize = {1, 3, 3}; + constexpr std::array<DimSize_t, 2 * DIM> beginEndBorder = + {0, 0, 2, 0, 0, 2}; + constexpr std::array<DimSize_t, DIM> outDataSize = { + inDataSize[0] + beginEndBorder[0] + beginEndBorder[3], + inDataSize[1] + beginEndBorder[1] + beginEndBorder[4], + inDataSize[2] + beginEndBorder[2] + beginEndBorder[5]}; + auto input = std::make_shared<Tensor>(Array5D<float, + batch, + channel, + inDataSize[0], + inDataSize[1], + inDataSize[2]>( + {{{{{{-1, 4, -2}, {-5, -2, 4}, {2, -2, 2}}}}}})); + auto padOp = setupTestPad<DIM>(beginEndBorder, + input, + PadBorderType::Wrap, + 0); + + REQUIRE_NOTHROW(padOp->forward()); + + Tensor expectedOutput( + Array5D<float, + batch, + channel, + outDataSize[0], + outDataSize[1], + outDataSize[2]>({{{{{{4, -2, -1, 4, -2, -1, 4}, + {-2, 4, -5, -2, 4, -5, -2}, + {-2, 2, 2, -2, 2, 2, -2}}}}}})); + CHECK(approxEq<float>(*padOp->getOutput(0), expectedOutput)); + } } -} \ No newline at end of file +}