diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 11f9c264098d5a238d0d1f8e6bc4fac0cc099549..b81145dec13d39ea701f2ae9dfcbd6b6d17bff39 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -21,6 +21,7 @@ #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/FoldImpl.hpp b/include/aidge/backend/cpu/operator/FoldImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..61701138b0cc1c7f0b7dcea0609ca0d463137e08 --- /dev/null +++ b/include/aidge/backend/cpu/operator/FoldImpl.hpp @@ -0,0 +1,55 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_ +#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Fold.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +namespace Aidge { +class FoldImpl2DForward_cpu + : public Registrable<FoldImpl2DForward_cpu, + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::vector<DimSize_t> &, + const void *, + void *)> {}; + +class FoldImpl2D_cpu : public OperatorImpl { +public: + FoldImpl2D_cpu(const Fold_Op<2> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<FoldImpl2D_cpu> create(const Fold_Op<2> &op) { + return std::make_unique<FoldImpl2D_cpu>(op); + } + + void forward() override; +}; + +namespace { +// add cpu backend to Fold_Op<2> implementation registry +static Registrar<Fold_Op<2>> registrarFoldImpl2D_cpu("cpu", Aidge::FoldImpl2D_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3dba2319af62fb3dfb2fa75ae9c592ee7ff88e65 --- /dev/null +++ b/include/aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp @@ -0,0 +1,87 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/FoldImpl.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <cmath> +#include <array> +#include <algorithm> + +namespace Aidge { +template <class I, class O> +void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims, + const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& dilationDims, + const std::array<DimSize_t, 2>& kernelDims, + const std::vector<DimSize_t> &dims, + const void *input_, void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const DimSize_t inHeight = outputDims[0]; + const DimSize_t inWidth = outputDims[1]; + + const DimSize_t kernelExtentHeight = dilationDims[0] * + (kernelDims[0] - 1) + 1; + const DimSize_t outHeight = 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inHeight - kernelExtentHeight) / + static_cast<float>(strideDims[0]))); + const DimSize_t kernelExtentWidth = dilationDims[1] * + (kernelDims[1] - 1) + 1; + const DimSize_t outWidth = 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inWidth - kernelExtentWidth) / + static_cast<float>(strideDims[1]))); + const DimSize_t outChannels = dims[dims.size() - 2]; + const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1]; + + std::fill_n(output, dims[0] * outHeight * outWidth * outChannels, O(0)); + + for (DimSize_t n = 0; n < dims[0]; ++n) { + for (DimSize_t outC = 0; outC < outChannels; ++outC) { + const auto inOffsetW = outC % kernelDims[1]; + const auto inOffsetH = (outC / kernelDims[1]) % kernelDims[0]; + const auto inC = outC / kernelDims[0] / kernelDims[1]; + + for (DimSize_t outH = 0; outH < outHeight; ++outH) { + const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0]; + + for (DimSize_t outW = 0; outW < outWidth; ++outW) { + const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1]; + + output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] += + input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW]; + } + } + } + } +} + +namespace { +static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + Aidge::FoldImpl2D_cpu_forward_kernel<float, float>); +static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::FoldImpl2D_cpu_forward_kernel<int, int>); +static Registrar<FoldImpl2DForward_cpu> registrarFoldImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::FoldImpl2D_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/FoldImpl.cpp b/src/operator/FoldImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..532ba946ab8a615a4ba0cb162faca28f1ca6c550 --- /dev/null +++ b/src/operator/FoldImpl.cpp @@ -0,0 +1,41 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu/operator/FoldImpl.hpp" +#include "aidge/backend/cpu/operator/FoldImpl_forward_kernels.hpp" + +void Aidge::FoldImpl2D_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<FoldImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + const auto& op_ = static_cast<const Fold_Op<2>&>(mOp); + kernelFunc(op_.outputDims(), + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6832f5a42d796d9261495794e0758ce1b6df0346 --- /dev/null +++ b/unit_tests/operator/Test_FoldImpl.cpp @@ -0,0 +1,178 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstdlib> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/operator/Fold.hpp" +#include "aidge/operator/Unfold.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/operator/Reshape.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") { + std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold"); + std::shared_ptr<Node> myReshape = Reshape({4, 27}, "myreshape"); + std::shared_ptr<Node> myMatMul = MatMul("mymatmul"); + std::shared_ptr<Node> myFold = Fold({3,3}, {1,1}, "myfold"); + myUnfold->addChild(myMatMul, 0, 1); + myReshape->addChild(myMatMul, 0, 0); + myMatMul->addChild(myFold, 0, 0); + + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { + { + { + {{ 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8}}, + {{ 9, 10, 11}, + { 12, 13, 14}, + { 15, 16, 17}}, + {{ 18, 19, 20}, + { 21, 22, 23}, + { 24, 25, 26}} + }, + { + {{ 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35}}, + {{ 36, 37, 38}, + { 39, 40, 41}, + { 42, 43, 44}}, + {{ 45, 46, 47}, + { 48, 49, 50}, + { 51, 52, 53}} + }, + { + {{ 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62}}, + {{ 63, 64, 65}, + { 66, 67, 68}, + { 69, 70, 71}}, + {{ 72, 73, 74}, + { 75, 76, 77}, + { 78, 79, 80}} + }, + { + {{ 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89}}, + {{ 90, 91, 92}, + { 93, 94, 95}, + { 96, 97, 98}}, + {{ 99, 100, 101}, + {102, 103, 104}, + {105, 106, 107}} + } + } + }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { + { + { + {{ 15219, 15570, 15921}, + { 16974, 17325, 17676}, + { 18729, 19080, 19431}}, + {{ 37818, 38898, 39978}, + { 43218, 44298, 45378}, + { 48618, 49698, 50778}}, + {{ 60417, 62226, 64035}, + { 69462, 71271, 73080}, + { 78507, 80316, 82125}}, + {{ 83016, 85554, 88092}, + { 95706, 98244, 100782}, + { 108396, 110934, 113472}} + }, + { + {{ 41544, 41895, 42246}, + { 43299, 43650, 44001}, + { 45054, 45405, 45756}}, + {{ 118818, 119898, 120978}, + { 124218, 125298, 126378}, + { 129618, 130698, 131778}}, + {{ 196092, 197901, 199710}, + { 205137, 206946, 208755}, + { 214182, 215991, 217800}}, + {{ 273366, 275904, 278442}, + { 286056, 288594, 291132}, + { 298746, 301284, 303822}} + } + } + }); + + auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator()); + auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator()); + auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator()); + opUnfold->associateInput(0,myInput); + opReshape->associateInput(0,myWeights); + + auto g = getConnectedGraphView(myMatMul); + g->setDataType(DataType::Int32); + g->setBackend("cpu"); + + g->forwardDims(); + g->save("unfold_matmul_fold"); + + SequentialScheduler scheduler(g); + scheduler.forward(); + //opFold->getOutput(0)->print(); + REQUIRE(*(opFold->getOutput(0)) == *myOutput); +} \ No newline at end of file diff --git a/unit_tests/recipies/Test_ConvToMatMul.cpp b/unit_tests/recipies/Test_ConvToMatMul.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05c5eef83394ba8c965dfabae2bcd8c2b4502c79 --- /dev/null +++ b/unit_tests/recipies/Test_ConvToMatMul.cpp @@ -0,0 +1,76 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/recipes/Recipes.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/filler/Filler.hpp" +#include "aidge/graph/OpArgs.hpp" +#include <cstddef> + +using namespace Aidge; + +TEST_CASE("[ConvToMatMul] conv") { + auto conv1 = Conv(3, 4, {3, 3}, "conv1"); + auto conv2 = Conv(4, 7, {3, 3}, "conv2", {1, 1}, {1, 1}, true); + auto conv3 = Conv(7, 10, {1, 1}, "conv3", {2, 2}); + + auto g1 = Sequential({ + Producer({2, 3, 13, 24}, "dataProvider"), + conv1, + conv2, + conv3 + }); + + g1->setBackend("cpu"); + g1->forwardDims(); + + // Random initialization of input and weights + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(0), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(2), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv2->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(2), -10.0, 10.0); + + auto s1 = SequentialScheduler(g1); + s1.forward(); + + g1->save("convToMatMul_before"); + + auto g2 = g1->clone(); + g2->forwardDims(); + REQUIRE(convToMatMul(g2) == 3); + + g2->setBackend("cpu"); + + auto s2 = SequentialScheduler(g2); + s2.forward(); + + g2->save("convToMatMul_after"); + + auto g1OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); + auto g2OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); + REQUIRE(*(g1OutOp->getOutput(0)) == *(g2OutOp->getOutput(0))); + + // Simplify the graph: freeze parameters to allow reshaping of the Producers + for (auto node : g2->getNodes()) { + if (node->type() == Producer_Op::Type && node->name() != "dataProvider") { + std::static_pointer_cast<Producer_Op>(node->getOperator())->constant() = true; + } + } + + constantFolding(g2); + g2->save("convToMatMul_after_folding"); +}