diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp index c39cf9cccb41905653c508b985cf27253e20f686..28ed8969aa415ab4151d038869594376480eba43 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp @@ -150,42 +150,93 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri // weight (outCh, ch, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { - for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; - B biasVal = (biases != nullptr) ? biases[ch] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); - const std::size_t sxMin = 0; - const std::size_t sxMax = dilated_kernel_x; - for (std::size_t oy = 0; oy < oySize; ++oy) { - // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); - const std::size_t syMin = 0; - const std::size_t syMax = dilated_kernel_y; - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); - } else { - for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { - for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { + const std::size_t outChannels_s = oxSize * oySize; + + if (dilated_kernel_x ==3 && dilated_kernel_y == 3) { + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + + std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = ch * 9; + + if (strideDims[0] == 1 && strideDims[1]==1) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2]; + } + iIndex+=inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2]; + } + iIndex+=inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2]; + } + } + } else { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2]; + } + iIndex+=strideDims[0]*inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2]; + } + iIndex+=strideDims[0]*inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2]; + } + } + } + output += outChannels_s; + } + } + } else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) { + std::size_t index = 0; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = ch; + + if (strideDims[0] == 1 && strideDims[1] == 1) { + for (; index < iIndex + oxSize*oySize; ++index) { + output[index] = biasVal + weights[wIndex] * input[index]; + } + } else { + std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; + for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize) { + index = iIndex + strideDims[0]*inputDims[3]; + for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { + output[oIndex + oy] += weights[wIndex]*input[index+iy]; + } + } + } + } + } + } else { + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + std::fill(output, output+outChannels_s, biasVal); + + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; + + for (std::size_t ox = 0; ox < oxSize; ++ox) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + + const std::size_t oIndexFull = ox*oySize + oy; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); + + for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) { + for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) { output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; } @@ -193,10 +244,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri } } } + output += outChannels_s; } } } + // Kernels registration to implementation entry point REGISTRAR(ConvDepthWiseImpl2D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index e800c252676ec5247a776abf458f808289b278c8..b4abac19143d8222cf632757f1c9d4a532cb3661 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -141,15 +141,15 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, O *output = static_cast<O *>(output_); // output H size + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / static_cast<float>(strideDims[0]))); - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // output W size + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilationDims[1]*(kernelDims[1] - 1) - 1 + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / static_cast<float>(strideDims[1]))); - const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; // TODO: kernel computation @@ -157,57 +157,108 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, // input (batch, inCh, Xin, Yin) // weight (outCh, inCh, kernelX, kernelY) // does not take Dilation attribute into account + const std::size_t outChannels_s = oxSize * oySize; using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; - // If bias = nullptr, set B(0) - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); - const std::size_t sxMin = 0; - const std::size_t sxMax = dilated_kernel_x; - for (std::size_t oy = 0; oy < oySize; ++oy) { - // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); - const std::size_t syMin = 0; - const std::size_t syMax = dilated_kernel_y; - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); - } else { - for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { - for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; + + if (dilated_kernel_x == 3 && dilated_kernel_y == 3) { + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output, output+outChannels_s, biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9; + if (strideDims[0] == 1 && strideDims[1]==1) { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2]; + } + iIndex+=inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2]; + } + iIndex+=inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2]; + } + } + } else { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2]; + } + iIndex+=strideDims[0]*inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2]; + } + iIndex+=strideDims[0]*inputDims[3]; + for (std::size_t oy = 0; oy < oySize; ++oy) { + output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2]; + } + } + } + } + output += outChannels_s; + } + } + } else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) { + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output, output+outChannels_s, biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]); + if (strideDims[0] == 1 && strideDims[1] == 1) { + for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) { + output[oIndex] += weights[wIndex] * input[iIndex]; + } + } else { + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) { + for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { + output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy]; + } + } + } + } + output += outChannels_s; + } + } + } else { + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output, output+outChannels_s, biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; + + // loop over each ouput line + for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) { + // loop over associated input line + for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) { + // loop over the entire line + for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) { + const std::size_t iIndex = iIndex_channel + ix + iy; + // loop over elements assosicated with one output + for (std::size_t kx = 0; kx < kernelDims[0]; ++kx) { + output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]]; } } } } } + output += outChannels_s; } } } } + + // Kernels registration to implementation entry point REGISTRAR(ConvImpl2D_cpu, {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp index e4e46de91bfbc38f41520f1edfc7e99d197e5c83..f1594ef5a21070803a7b86861eac513708ec03a2 100644 --- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp +++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp @@ -11,144 +11,219 @@ #include <catch2/catch_test_macros.hpp> #include <memory> +#include <vector> +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ConvDepthWise.hpp" - -#include "aidge/backend/cpu.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") { - std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3,3}, "mycdw"); - auto op = std::static_pointer_cast<OperatorTensor>(myCDW -> getOperator()); - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,1,3,3> { - { - {{ - { 0, 1, 2}, - { 3, 4, 5}, - { 6, 7, 8} - - }}, - {{ - { 27, 28, 29}, - { 30, 31, 32}, - { 33, 34, 35} - - }}, - {{ - { 54, 55, 56}, - { 57, 58, 59}, - { 60, 61, 62} - }}, - {{ - { 81, 82, 83}, - { 84, 85, 86}, - { 87, 88, 89} - }} - } - }); - std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { //NCHW - { - { - {{ 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - { 10, 11, 12, 13, 14}, - { 15, 16, 17, 18, 19}, - { 20, 21, 22, 23, 24}}, - - {{ 25, 26, 27, 28, 29}, - { 30, 31, 32, 33, 34}, - { 35, 36, 37, 38, 39}, - { 40, 41, 42, 43, 44}, - { 45, 46, 47, 48, 49}}, - - {{ 50, 51, 52, 53, 54}, - { 55, 56, 57, 58, 59}, - { 60, 61, 62, 63, 64}, - { 65, 66, 67, 68, 69}, - { 70, 71, 72, 73, 74}}, - - {{ 75, 76, 77, 78, 79}, - { 80, 81, 82, 83, 84}, - { 85, 86, 87, 88, 89}, - { 90, 91, 92, 93, 94}, - { 95, 96, 97, 98, 99}} - }, + SECTION("k[3,3]") { + std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3,3}, "mycdw"); + auto op = std::static_pointer_cast<OperatorTensor>(myCDW -> getOperator()); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,1,3,3> { { - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}}, - - {{150, 151, 152, 153, 154}, - {155, 156, 157, 158, 159}, - {160, 161, 162, 163, 164}, - {165, 166, 167, 168, 169}, - {170, 171, 172, 173, 174}}, - - {{175, 176, 177, 178, 179}, - {180, 181, 182, 183, 184}, - {185, 186, 187, 188, 189}, - {190, 191, 192, 193, 194}, - {195, 196, 197, 198, 199}} + {{ + { 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8} + + }}, + {{ + { 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35} + + }}, + {{ + { 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62} + }}, + {{ + { 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89} + }} } - } - }); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { - { + }); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { //NCHW { - {{ 319, 355, 391}, - { 499, 535, 571}, - { 679, 715, 751}}, - - {{ 8745, 9024, 9303}, - { 10140, 10419, 10698}, - { 11535, 11814, 12093}}, - - {{ 29337, 29859, 30381}, - { 31947, 32469, 32991}, - { 34557, 35079, 35601}}, - - {{ 62061, 62826, 63591}, - { 65886, 66651, 67416}, - { 69711, 70476, 71241}} - }, + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}}, + + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}} + }, + { + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}, + + {{150, 151, 152, 153, 154}, + {155, 156, 157, 158, 159}, + {160, 161, 162, 163, 164}, + {165, 166, 167, 168, 169}, + {170, 171, 172, 173, 174}}, + + {{175, 176, 177, 178, 179}, + {180, 181, 182, 183, 184}, + {185, 186, 187, 188, 189}, + {190, 191, 192, 193, 194}, + {195, 196, 197, 198, 199}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { { - {{ 3919, 3955, 3991}, - { 4099, 4135, 4171}, - { 4279, 4315, 4351}}, - - {{ 36645, 36924, 37203}, - { 38040, 38319, 38598}, - { 39435, 39714, 39993}}, - - {{ 81537, 82059, 82581}, - { 84147, 84669, 85191}, - { 86757, 87279, 87801}}, - - {{138561, 139326, 140091}, - {142386, 143151, 143916}, - {146211, 146976, 147741}} + { + {{ 319, 355, 391}, + { 499, 535, 571}, + { 679, 715, 751}}, + + {{ 8745, 9024, 9303}, + { 10140, 10419, 10698}, + { 11535, 11814, 12093}}, + + {{ 29337, 29859, 30381}, + { 31947, 32469, 32991}, + { 34557, 35079, 35601}}, + + {{ 62061, 62826, 63591}, + { 65886, 66651, 67416}, + { 69711, 70476, 71241}} + }, + { + {{ 3919, 3955, 3991}, + { 4099, 4135, 4171}, + { 4279, 4315, 4351}}, + + {{ 36645, 36924, 37203}, + { 38040, 38319, 38598}, + { 39435, 39714, 39993}}, + + {{ 81537, 82059, 82581}, + { 84147, 84669, 85191}, + { 86757, 87279, 87801}}, + + {{138561, 139326, 140091}, + {142386, 143151, 143916}, + {146211, 146976, 147741}} + } } - } - }); - op -> associateInput(0, myInput); - op -> associateInput(1, myWeights); - op -> associateInput(2, myBias); - op->setDataType(DataType::Int32); - op->setBackend("cpu"); - myCDW -> forward(); - op -> getOutput(0) -> print(); - REQUIRE(*(op -> getOutput(0)) == *myOutput); - - // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl; + }); + op -> associateInput(0, myInput); + op -> associateInput(1, myWeights); + op -> associateInput(2, myBias); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myCDW -> forward(); + op -> getOutput(0) -> print(); + REQUIRE(*(op -> getOutput(0)) == *myOutput); + } + SECTION("point-wise") { + ConvDepthWise_Op<2> conv_op = ConvDepthWise_Op<2>({1,1}); + std::shared_ptr<Tensor> weights = std::make_shared<Tensor>(std::vector<std::size_t>({3,1,1,1})); + weights -> setBackend("cpu"); + std::shared_ptr<Tensor> biases = std::make_shared<Tensor>(std::vector<std::size_t>({3})); + biases -> setBackend("cpu"); + std::shared_ptr<Tensor> input = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5})); + input -> setBackend("cpu"); + std::shared_ptr<Tensor> expected_output = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5})); + expected_output -> setBackend("cpu"); + + float weighst_array[3] {-0.0045, -0.4223, -0.9452}; + weights->getImpl()->setRawPtr(weighst_array, 3); + + float biases_array[3] {-0.8595, 0.7062, -0.0062}; + biases->getImpl()->setRawPtr(biases_array, 3); + + float input_array[2*3*5*5] { + 0.6581, 0.2509, 0.2660, 0.8270, 0.8040, 0.3147, 0.5028, 0.2591, 0.8585, + 0.7762, 0.9972, 0.0305, 0.1202, 0.2682, 0.9306, 0.7927, 0.1494, 0.0678, + 0.5550, 0.4132, 0.4742, 0.6199, 0.1802, 0.6350, 0.2539, 0.5594, 0.0143, + 0.8656, 0.7105, 0.1420, 0.2464, 0.7883, 0.5715, 0.7642, 0.5492, 0.6628, + 0.4922, 0.7941, 0.8421, 0.7914, 0.0237, 0.8081, 0.0174, 0.6018, 0.7402, + 0.3770, 0.8786, 0.3651, 0.5355, 0.4267, 0.4457, 0.6756, 0.9631, 0.0145, + 0.4470, 0.5202, 0.2675, 0.5815, 0.3487, 0.3457, 0.7179, 0.0518, 0.1520, + 0.0573, 0.9219, 0.3615, 0.0866, 0.5237, 0.4725, 0.2565, 0.8726, 0.6434, + 0.6875, 0.2919, 0.3355, 0.1886, 0.1749, 0.0785, 0.4091, 0.1907, 0.4664, + 0.2738, 0.4784, 0.7807, 0.0687, 0.3091, 0.4557, 0.2277, 0.2424, 0.8691, + 0.1893, 0.2918, 0.5691, 0.1926, 0.2866, 0.0097, 0.5445, 0.5085, 0.1110, + 0.7099, 0.8927, 0.6182, 0.2538, 0.8694, 0.7872, 0.3196, 0.0710, 0.2888, + 0.0403, 0.1670, 0.6840, 0.7323, 0.4861, 0.3390, 0.1096, 0.5070, 0.3872, + 0.7473, 0.6224, 0.6910, 0.7530, 0.0149, 0.0866, 0.9022, 0.5027, 0.3849, + 0.5255, 0.1977, 0.0570, 0.9581, 0.5461, 0.4623, 0.0101, 0.2362, 0.5922, + 0.8398, 0.1497, 0.5160, 0.2862, 0.5931, 0.9728, 0.1353, 0.7790, 0.9137, + 0.9351, 0.4036, 0.7638, 0.3873, 0.0494, 0.7450}; + input->getImpl()->setRawPtr(input_array, 2*3*5*5); + + float expected_output_array[2*3*5*5] { + -0.8624, -0.8606, -0.8607, -0.8632, -0.8631, -0.8609, -0.8617, -0.8606, + -0.8633, -0.8629, -0.8639, -0.8596, -0.8600, -0.8607, -0.8636, -0.8630, + -0.8601, -0.8598, -0.8620, -0.8613, -0.8616, -0.8622, -0.8603, -0.8623, + -0.8606, 0.4700, 0.7002, 0.3407, 0.4062, 0.6463, 0.6022, 0.3733, + 0.4649, 0.3835, 0.4743, 0.4263, 0.4984, 0.3709, 0.3506, 0.3720, + 0.6962, 0.3650, 0.6989, 0.4521, 0.3936, 0.5470, 0.3352, 0.5520, + 0.4801, 0.5260, -0.4274, -0.6447, -0.9165, -0.0199, -0.4287, -0.4979, + -0.2590, -0.5559, -0.3358, -0.3329, -0.6847, -0.0552, -0.1499, -0.0603, + -0.8776, -0.3479, -0.0881, -0.5011, -0.4528, -0.2486, -0.8309, -0.6143, + -0.6561, -0.2821, -0.3233, -0.8603, -0.8603, -0.8598, -0.8613, -0.8603, + -0.8616, -0.8607, -0.8616, -0.8630, -0.8598, -0.8609, -0.8615, -0.8605, + -0.8606, -0.8634, -0.8603, -0.8608, -0.8620, -0.8603, -0.8608, -0.8595, + -0.8619, -0.8617, -0.8600, -0.8626, 0.3292, 0.4451, 0.5991, 0.3390, + 0.3738, 0.5712, 0.6762, 0.5843, 0.6892, 0.6357, 0.4174, 0.3969, + 0.5009, 0.5631, 0.6599, 0.4921, 0.5427, 0.3906, 0.4434, 0.4144, + 0.3882, 0.6999, 0.6697, 0.3252, 0.4939, -0.3700, -0.5029, -0.1931, + -0.0601, -0.9118, -0.5224, -0.4432, -0.0157, -0.2294, -0.5660, -0.7999, + -0.1477, -0.4939, -0.2767, -0.5668, -0.9257, -0.1341, -0.7425, -0.8698, + -0.8900, -0.3877, -0.7282, -0.3722, -0.0529, -0.7103}; + expected_output->getImpl()->setRawPtr(expected_output_array, 2*3*5*5); + + conv_op.associateInput(0, input); + conv_op.associateInput(1, weights); + conv_op.associateInput(2, biases); + + conv_op.setBackend("cpu"); + conv_op.setDataType(DataType::Float32); + conv_op.forwardDims(); + + conv_op.forward(); + + conv_op.getOutput(0)->print(); + + REQUIRE(approxEq<float>(*(conv_op.getOutput(0)),*expected_output, 1e-3f, 1e-4f)); + } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index b52085139294021de2fe9d72e173ad74db028ea3..e48d69c89eb0d6d52a834b3f32a41d8621fdd42b 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -15,6 +15,7 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu.hpp" @@ -153,7 +154,7 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { op->setDataType(DataType::Int32); op->setBackend("cpu"); myConv->forward(); - // op->getOutput(0)->print(); + op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == *myOutput); } SECTION("Point-wise") { @@ -251,4 +252,147 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") { REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); } } + SECTION("Strided and dilated Conv") { + std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv", {3,3},{2,2}); + auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator()); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,8,8> { + {{{ + {0.0107F, 0.5076F, 0.2293F, 0.0486F, 0.7375F, 0.2637F, 0.9615F, 0.9138F}, + {0.0678F, 0.5604F, 0.1940F, 0.0287F, 0.1029F, 0.2059F, 0.5058F, 0.9885F}, + {0.9904F, 0.2890F, 0.4606F, 0.1055F, 0.9028F, 0.1654F, 0.6499F, 0.4775F}, + {0.9499F, 0.4695F, 0.1713F, 0.0731F, 0.4913F, 0.8921F, 0.1782F, 0.1111F}, + {0.2479F, 0.4669F, 0.1078F, 0.6153F, 0.0299F, 0.6484F, 0.2397F, 0.1814F}, + {0.3779F, 0.9032F, 0.5651F, 0.3896F, 0.8439F, 0.6404F, 0.3813F, 0.0841F}, + {0.5566F, 0.8950F, 0.1226F, 0.8881F, 0.9870F, 0.6256F, 0.6387F, 0.0628F}, + {0.2857F, 0.0579F, 0.6247F, 0.1286F, 0.0951F, 0.1268F, 0.9510F, 0.3789F}}, + + {{0.7648F, 0.5340F, 0.1024F, 0.4098F, 0.9958F, 0.7941F, 0.1190F, 0.7328F}, + {0.4532F, 0.6598F, 0.9146F, 0.1690F, 0.6041F, 0.7230F, 0.5719F, 0.9282F}, + {0.2862F, 0.2329F, 0.7302F, 0.6717F, 0.1983F, 0.1876F, 0.4561F, 0.2126F}, + {0.7849F, 0.0239F, 0.7977F, 0.5935F, 0.9958F, 0.4703F, 0.4612F, 0.1627F}, + {0.6393F, 0.3544F, 0.8643F, 0.5039F, 0.8087F, 0.6521F, 0.5086F, 0.9331F}, + {0.7749F, 0.9798F, 0.6820F, 0.7869F, 0.5144F, 0.2941F, 0.8137F, 0.4561F}, + {0.6505F, 0.3974F, 0.6909F, 0.7019F, 0.2729F, 0.4240F, 0.0162F, 0.1536F}, + {0.3529F, 0.8821F, 0.1812F, 0.3426F, 0.3472F, 0.0300F, 0.8841F, 0.8088F}}, + + {{0.5099F, 0.3323F, 0.1488F, 0.3424F, 0.1494F, 0.6225F, 0.8103F, 0.5995F}, + {0.9198F, 0.5635F, 0.8908F, 0.9378F, 0.6689F, 0.3176F, 0.3755F, 0.3883F}, + {0.0626F, 0.5309F, 0.0307F, 0.3955F, 0.2794F, 0.1420F, 0.4758F, 0.7558F}, + {0.6154F, 0.5280F, 0.2318F, 0.3832F, 0.4435F, 0.3490F, 0.4043F, 0.5872F}, + {0.3705F, 0.3848F, 0.2182F, 0.8332F, 0.4559F, 0.5310F, 0.4611F, 0.4236F}, + {0.6141F, 0.8103F, 0.2260F, 0.9907F, 0.5615F, 0.4520F, 0.6949F, 0.0175F}, + {0.3969F, 0.5021F, 0.0970F, 0.9937F, 0.9270F, 0.4302F, 0.2868F, 0.3891F}, + {0.8693F, 0.5170F, 0.5348F, 0.2676F, 0.9769F, 0.3356F, 0.9427F, 0.3908F}} + }, + { + {{0.4803F, 0.5223F, 0.6395F, 0.8402F, 0.4442F, 0.6377F, 0.7852F, 0.9063F}, + {0.0361F, 0.0470F, 0.3104F, 0.6921F, 0.0543F, 0.4490F, 0.9541F, 0.7395F}, + {0.3832F, 0.3828F, 0.2236F, 0.2068F, 0.4369F, 0.7443F, 0.6952F, 0.6394F}, + {0.5309F, 0.8483F, 0.1991F, 0.9756F, 0.8969F, 0.7284F, 0.4657F, 0.5486F}, + {0.8839F, 0.3260F, 0.6892F, 0.4074F, 0.9473F, 0.5526F, 0.4147F, 0.4786F}, + {0.9674F, 0.0952F, 0.8379F, 0.2163F, 0.9420F, 0.4046F, 0.1339F, 0.5234F}, + {0.4213F, 0.8392F, 0.3184F, 0.4576F, 0.9349F, 0.8267F, 0.0931F, 0.8009F}, + {0.5570F, 0.5871F, 0.4175F, 0.5465F, 0.6679F, 0.9224F, 0.0049F, 0.9421F}}, + + {{0.3739F, 0.6230F, 0.7613F, 0.1337F, 0.8527F, 0.0557F, 0.6424F, 0.8463F}, + {0.7179F, 0.5638F, 0.2457F, 0.4579F, 0.0487F, 0.8693F, 0.8216F, 0.0415F}, + {0.1724F, 0.5108F, 0.9103F, 0.0850F, 0.0080F, 0.8927F, 0.7706F, 0.3600F}, + {0.7751F, 0.8828F, 0.7872F, 0.4541F, 0.3181F, 0.1855F, 0.2486F, 0.0033F}, + {0.5558F, 0.3500F, 0.6034F, 0.1763F, 0.7418F, 0.5190F, 0.5147F, 0.4090F}, + {0.4476F, 0.1249F, 0.8116F, 0.9091F, 0.1738F, 0.6150F, 0.3285F, 0.3133F}, + {0.5657F, 0.4447F, 0.5049F, 0.3425F, 0.7443F, 0.2718F, 0.2466F, 0.5586F}, + {0.3684F, 0.7616F, 0.5165F, 0.9621F, 0.2864F, 0.7747F, 0.8110F, 0.7045F}}, + + {{0.4570F, 0.4577F, 0.0373F, 0.6084F, 0.4632F, 0.3472F, 0.9917F, 0.2011F}, + {0.7921F, 0.2202F, 0.9525F, 0.7274F, 0.3357F, 0.0076F, 0.5786F, 0.3034F}, + {0.6510F, 0.0798F, 0.2757F, 0.1738F, 0.3046F, 0.2197F, 0.3872F, 0.5650F}, + {0.1532F, 0.3204F, 0.6094F, 0.3287F, 0.8903F, 0.9773F, 0.7950F, 0.2845F}, + {0.2482F, 0.3395F, 0.8795F, 0.4325F, 0.1395F, 0.2457F, 0.2968F, 0.5424F}, + {0.8636F, 0.7426F, 0.2151F, 0.6900F, 0.3938F, 0.0062F, 0.4980F, 0.4098F}, + {0.8026F, 0.0464F, 0.2662F, 0.7835F, 0.8444F, 0.0688F, 0.8796F, 0.7625F}, + {0.2764F, 0.5341F, 0.1773F, 0.6671F, 0.7555F, 0.5235F, 0.7142F, 0.9423F}}}} + }); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,4> {{ 0.1902F, -0.1789F, -0.0314F, -0.0589F}}); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<float,4,3,3,3> { //NCHW + { + { + {{ 0.0039F, 0.1098F, -0.0834F}, + {-0.0890F, 0.0725F, -0.1178F}, + { 0.1056F, -0.0924F, -0.0574F}}, + {{ 0.0070F, -0.0730F, -0.0674F}, + {-0.0380F, -0.1025F, -0.0085F}, + {-0.1451F, -0.0656F, 0.1137F}}, + {{ 0.1020F, 0.1025F, -0.0678F}, + { 0.0028F, 0.1512F, -0.0871F}, + { 0.1563F, -0.1446F, -0.1636F}} + }, + { + {{ 0.1472F, 0.0025F, -0.0281F}, + { 0.0350F, 0.0296F, -0.1711F}, + {-0.1197F, -0.1198F, -0.1130F}}, + {{-0.1492F, 0.1554F, -0.1044F}, + { 0.1203F, -0.1596F, 0.0589F}, + {-0.0436F, -0.1876F, -0.0816F}}, + {{ 0.1572F, -0.0982F, 0.1293F}, + { 0.1358F, 0.1559F, 0.1322F}, + { 0.0296F, -0.0354F, -0.0632F}} + }, + { + {{-0.0941F, -0.0479F, 0.0908F}, + {-0.1319F, -0.1333F, 0.1223F}, + {-0.1098F, 0.1924F, 0.1075F}}, + {{ 0.1796F, 0.0213F, 0.0626F}, + { 0.0275F, 0.1883F, -0.0818F}, + { 0.0363F, 0.0684F, 0.1094F}}, + {{ 0.1131F, 0.1258F, -0.0558F}, + { 0.1498F, 0.0322F, -0.0186F}, + {-0.1801F, -0.0358F, 0.1727F}} + }, + { + {{-0.1500F, -0.0554F, -0.0994F}, + {-0.0818F, -0.1223F, 0.1365F}, + { 0.1281F, 0.1507F, -0.0890F}}, + {{-0.0444F, -0.1071F, -0.1632F}, + { 0.0757F, -0.1235F, 0.0408F}, + { 0.0401F, -0.1914F, 0.1772F}}, + {{-0.0714F, 0.1582F, -0.0065F}, + {-0.0119F, 0.1375F, -0.0727F}, + {-0.1532F, -0.1826F, -0.0417F}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,4,2,2> { + { + { + {{-0.2174F, -0.0778F}, + {-0.2584F, 0.2303F}}, + {{-0.7686F, -0.3879F}, + {-0.1775F, 0.0119F}}, + {{ 0.5180F, 0.5087F}, + { 0.5398F, 0.3476F}}, + {{-0.5258F, -0.3128F}, + {-0.6673F, -0.1827F}} + }, + { + {{-0.1902F, -0.0467F}, + {-0.3327F, -0.1701F}}, + {{-0.5505F, -0.4875F}, + {-0.4119F, -0.5726F}}, + {{ 0.5777F, 0.4428F}, + { 0.6121F, 0.7221F}}, + {{-0.6009F, -0.6335F}, + {-0.5159F, -0.3353F}} + } + } + }); + op->associateInput(0,myInput); + op->associateInput(1,myWeights); + op->associateInput(2,myBias); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + myConv->forward(); + op->getOutput(0)->print(); + REQUIRE(approxEq<float>(*(op->getOutput(0)),*myOutput, 1e-3f, 1e-4f)); + } } \ No newline at end of file