diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index 0aeeb04b74a078f77c57500b959d6ef9fa9af4d0..c37fc54437c02b0bb1c6f09a1c73d5cc538fa4c0 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -17,12 +17,12 @@ class test_scheduler(unittest.TestCase): input_node = aidge_core.Producer(aidge_core.Tensor(values), "Input") relu = aidge_core.ReLU() + input_node.add_child(relu) gv = aidge_core.GraphView() gv.add(relu) gv.add(input_node) - input_node.add_child(relu) gv.set_datatype(aidge_core.dtype.int32) gv.set_backend("cpu") diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index b770cf2b51d1489dc30b6173de25506283208392..3a605de8cbfcda125f3982e5574040a4e544d0a6 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -19,6 +19,7 @@ #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/ClipImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6da67bb7dd4469b6ca609c5aea1ae70dfca3f939 --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op, + void(const BitShift_Op::BitShiftDirection, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + void*)>; + + // Implementation entry point registration to Operator + REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f815e946ea2e4abaff48a6e5155368d564e88e8c --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp @@ -0,0 +1,70 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include <cstdint> // std::int32_t, std::int64_t +#include "aidge/operator/BitShift.hpp" + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" + + + +namespace Aidge { +template <class I1, class I2, class O> +void BitShiftImpl_cpu_forward_kernel( + const BitShift_Op::BitShiftDirection direction, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_ + ) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>()); + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + if(direction == BitShift_Op::BitShiftDirection::right) + + { + output[oIndex]= input_1[idx1] >> input_2[idx2]; + } + else + { + output[oIndex] = input_1[idx1] << input_2[idx2]; + } + } +} + +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int32}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr}); +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int64}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr}); + + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp index ff9bb148fa68d75e2d4b00804e13f063e3ca2cc0..59a471aee82f7c706be390d80b5db569bd3c6f1e 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp @@ -38,7 +38,7 @@ namespace Aidge { */ template <class I, class W, class B, class O> void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& dilationDims, const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 3>& inputDims, const void *input_, @@ -53,10 +53,12 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri // output H size + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / static_cast<float>(strideDims[0]))); + // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, ch, Xin, Yin) @@ -71,15 +73,17 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; const std::size_t wIndex = ch * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; } } } @@ -113,7 +117,7 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu, */ template <class I, class W, class B, class O> void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& dilationDims, const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 4>& inputDims, const void *input_, @@ -129,12 +133,14 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri // output H size + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / static_cast<float>(strideDims[0]))); // output W size + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) / static_cast<float>(strideDims[1]))); // TODO: kernel computation @@ -151,13 +157,17 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + const std::size_t syMin = 0; + const std::size_t syMax = dilated_kernel_y; const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); @@ -173,10 +183,10 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; } } } diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index cc3bd57cb17f2a0feb6a79af2c291e6f960467d8..e800c252676ec5247a776abf458f808289b278c8 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -40,7 +40,7 @@ namespace Aidge { */ template <class I, class W, class B, class O> void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& dilationDims, const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 3>& inputDims, DimSize_t outChannels, @@ -57,8 +57,9 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -76,15 +77,17 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; } } } @@ -122,7 +125,7 @@ REGISTRAR(ConvImpl1D_cpu, */ template <class I, class W, class B, class O> void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& dilationDims, const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels, @@ -139,12 +142,15 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilationDims[1]*(kernelDims[1] - 1) - 1 + strideDims[1]) / static_cast<float>(strideDims[1]))); + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; + // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -162,13 +168,17 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + const std::size_t syMin = 0; + const std::size_t syMax = dilated_kernel_y; const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); @@ -184,10 +194,10 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; } } } diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index daf23177fb57bee4111c92654ad94dfae3e50f08..cfbb8173d1f83162519016a8f2b3c3166977a5b7 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -24,7 +24,8 @@ namespace Aidge { // Operator implementation entry point for the backend using PowImpl_cpu = OperatorImpl_cpu<Pow_Op, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*), - void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>; + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>; + // Implementation entry point registration to Operator REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create); diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp index f484fabf437f656dc8671d4ac78161ef11e84de5..ab9b2ccc7b823842decd044b90a5c6364cedc9c9 100644 --- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp @@ -31,14 +31,10 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, const I2* input_2 = static_cast<const I2*>(input2_); O* output = static_cast<O*>(output_); - size_t totalElements = 1; - for (size_t dimSize : outputDims) { - totalElements *= dimSize; - } - + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); @@ -47,16 +43,53 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, } } +template <class I1, class I2, class O> +void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* gradOutput_, + void* gradientInput0_, + void* gradientInput1_) { + const I1* input0 = static_cast<const I1*>(input0_); + I1* grad0 = static_cast<I1*>(gradientInput0_); + const I2* input1 = static_cast<const I2*>(input1_); + I2* grad1 = static_cast<I2*>(gradientInput1_); + const O* gradOut = static_cast<const O*>(gradOutput_); + + // Fill input grads with zeros + std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad0, grad0 + input0Elements, I1(0)); + std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad1, grad1 + input1Elements, I2(0)); + + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + // Compute indexes in inputs 0 and 1 to support broadcasting + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx0 = getFlattenedIndex(input0Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + + // grad0 = grad_output * (input1 * pow(input0, (input1 -1))) + grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1); + + // grad1 = grad_output * (output * ln(input0)) + grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]); + } +} + // Kernels registration to implementation entry point REGISTRAR(PowImpl_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); REGISTRAR(PowImpl_cpu, {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); REGISTRAR(PowImpl_cpu, {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, nullptr}); + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */ diff --git a/project_name.txt b/project_name.txt deleted file mode 100644 index f8a086fc063978638db5a0fcfe1dc2e5c9d0c1b7..0000000000000000000000000000000000000000 --- a/project_name.txt +++ /dev/null @@ -1 +0,0 @@ -aidge_backend_cpu \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 24ce15ab7ead32f98c7ac3edcd34bb2010ff4326..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -numpy diff --git a/src/operator/BitShiftImpl.cpp b/src/operator/BitShiftImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e0f79fd29fd140f0b41c64d245b9b240da80028 --- /dev/null +++ b/src/operator/BitShiftImpl.cpp @@ -0,0 +1,57 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> +#include <thread> // std::this_thread::sleep_for +#include <vector> + + +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp" + +template<> +void Aidge::BitShiftImpl_cpu::forward() { + + const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp); + + + const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + + const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + + BitShift_Op::BitShiftDirection direction = op_.direction(); + + // Call kernel + impl.forward( + direction, + inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); + +} + +template <> +void Aidge::BitShiftImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu"); +} \ No newline at end of file diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index fe16bb955973d99e022c61043e8144aeaf6801a1..74a7be71e176ba8e1cb8851050e575d6aa7465df 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -44,21 +44,29 @@ void Aidge::PowImpl_cpu::forward() { template <> void Aidge::PowImpl_cpu::backward() { - // Find the correct kernel type const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp); - const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); - const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); + const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); // Find the correct kernel type const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - impl.backward(op_.getOutput(0)->grad()->dims(), - input0gradDims, - input1gradDims, - getCPUPtr(mOp.getRawOutput(0)), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1))); + impl.backward(input0gradDims, + input1gradDims, + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } \ No newline at end of file diff --git a/unit_tests/data/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp index 31fbed4c090f5e4848df12f2bc2ccd36e3aedf9d..5f870acfb44366632474b7290228658d7a4701dd 100644 --- a/unit_tests/data/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -154,6 +154,10 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") { Tensor T4(T1->dims()); T4.setDataType(DataType::Float64); REQUIRE_THROWS(*T0 + T4); + + delete[] array0; + delete[] array1; + delete[] result; } } diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a52990bc7991a325ce151cf6634b0d5a831992c8 --- /dev/null +++ b/unit_tests/operator/Test_BitShift.cpp @@ -0,0 +1,245 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <chrono> +#include <iostream> +#include <memory> +#include <numeric> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <iomanip> +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { + constexpr std::uint16_t NBTRIALS = 15; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> valueDist(-15, 15); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0,1); + + BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + + if(valueDist(gen) % 2 == 0) + { + direction = BitShift_Op::BitShiftDirection::right; + } + + // Create BitShift Operator + std::shared_ptr<Node> myBitShift = BitShift(direction); + auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator()); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + + // Create 2 input Tensors + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0,T0); + T0->setDataType(DataType::Int32); + T0->setBackend("cpu"); + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); + op -> associateInput(1,T1); + T1->setDataType(DataType::Int32); + T1->setBackend("cpu"); + + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Int32); + Tres->setBackend("cpu"); + + // To measure execution time of 'BitShift_Op::forward()' member function call + std::chrono::time_point<std::chrono::system_clock> start; + + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + + SECTION("BitShiftImpl_cpu::forward()") { + SECTION("Test Forward Kernel with same dimensions") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + const std::size_t nbDims = nbDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + + // without broadcasting + int* array0 = new int[nb_elements]; + int* array1 = new int[nb_elements]; + int* result = new int[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[i] = array0[i] << array1[i]; + } + else + { + result[i] = array0[i] >> array1[i]; + } + } + + // input0 + T0->resize(dims); + T0 -> getImpl() -> setRawPtr(array0, nb_elements); + + // input1 + T1->resize(dims); + T1 -> getImpl() -> setRawPtr(array1, nb_elements); + + // results + Tres->resize(dims); + Tres -> getImpl() -> setRawPtr(result, nb_elements); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres); + + auto Output = *(op->getOutput(0)); + auto prt = Output.getImpl()->rawPtr(); + + REQUIRE(is_eq); + + delete[] array0; + delete[] array1; + delete[] result; + + + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + SECTION("Test BitShift kernels with Broadcasting") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + // handle dimensions, replace some dimensions with '1' to get broadcasting + constexpr std::size_t nbDims = 4; + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + std::vector<std::size_t> dims0 = dims; + std::vector<std::size_t> dims1 = dims; + std::vector<std::size_t> dimsOut = dims; + for (std::size_t i = 0; i < nbDims; ++i) { + if (boolDist(gen)) { + dims0[i] = 1; + } + if (boolDist(gen)) { + dims1[i] = 1; + } + dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; + } + + // create arrays and fill them with random values + int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; + int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; + int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; + + for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + array0[i] = valueDist(gen); + } + for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + array1[i] = std::abs(valueDist(gen)); + } + + //True result with broadcast + const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; + const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + for (std::size_t a = 0; a < dimsOut[0]; ++a) { + for (std::size_t b = 0; b < dimsOut[1]; ++b) { + const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); + for (std::size_t c = 0; c < dimsOut[2]; ++c) { + const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + for (std::size_t d = 0; d < dimsOut[3]; ++d) { + std::size_t idx0 = idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[idx_out + d] = array0[idx0] << array1[idx1]; + } + else + { + result[idx_out + d] = array0[idx0] >> array1[idx1]; + } + } + } + } + } + + // conversion to Aidge::Tensors + // input0 + T0->resize(dims0); + T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + + // input1 + T1->resize(dims1); + T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + + // results + Tres->resize(dimsOut); + Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + + // compute result + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + // comparison between truth and computed result + bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres)); + if(equiv == false) + { + std::cout << "Problem\n"; + } + REQUIRE(equiv); + + delete[] array0; + delete[] array1; + delete[] result; + + const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + +} +} // namespace Aidge +} \ No newline at end of file diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp index 8a1e589fa0e9a57d712c77a12501d35f5f995bcc..d6e934b4dc8d84e8a595eb74d1af9d2c68c892d1 100644 --- a/unit_tests/operator/Test_MatMulImpl.cpp +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -101,6 +101,10 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] bigArray1; + delete[] bigArray2; + delete[] res; } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; @@ -165,6 +169,10 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] bigArray1; + delete[] bigArray2; + delete[] res; } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; @@ -231,6 +239,10 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] bigArray1; + delete[] bigArray2; + delete[] res; } std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; std::cout << "total time: " << duration.count() << std::endl; diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index 3b85defb37ff76439b658faa84c3c7457a152d2f..cb5d8872c9c7242bb4aa4efca388d53b578417f9 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { std::cout << "total time: " << duration.count() << "μs" << std::endl; } } + + + SECTION("PowImpl_cpu::backward()") { + SECTION("3D Tensors") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {2.0, 3.0}, + {4.0, 5.0} + }, + { + {6.0, 7.0}, + {8.0, 9.0} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {1.0, 2.0}, + {3.0, 2.0} + }, + { + {2.0, 3.0}, + {1.0, 0.5} + } + } + } + )); + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.5, 1.0}, + {1.5, 2.0} + }, + { + {2.5, 3.0}, + {3.5, 4.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.50000000, 6.00000000}, + {72.00000000, 20.00000000} + }, + { + {30.00000000, 441.00000000}, + {3.50000000, 0.66666669} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + { 0.693147182, 9.88751030}, + {1.33084259e+02, 8.04718933e+01} + }, + { + {1.61258362e+02, 2.00234143e+03}, + {5.82243652e+01, 2.63666954e+01} + } + } + } + )); + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + opr->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + SECTION("Broadcasting") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {1.5, 2.5, 3.5}, + {4.5, 5.5, 6.5} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {0.1, 0.2, 0.3} + } + )); + + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {0.10000000, 0.22973967, 0.41711676}, + {0.11486985, 0.27594593, 0.51353097} + }, + { + {0.41655189, 0.48044977, 0.49926791}, + {0.07748720, 0.10227509, 0.08092485} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {14.14779854, 22.99299049, 33.56402588} + } + )); + + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + powOp->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + } } } // namespace Aidge diff --git a/version.txt b/version.txt index 7179039691ce07a214e7a815893fee97a97b1422..0d91a54c7d439e84e3dd17d3594f1b2b6737f430 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.3 +0.3.0