diff --git a/README.md b/README.md index 74eb50826bf6f88a0ded363138adba04827390d0..865cb08a17ebf8638cb2ac56773a4f464860b8ae 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,11 @@ So far be sure to have the correct requirements to use this library ## Pip installation -You will need to install first the aidge_core library before installing aidge_cpu. -Also, make sure that the install path was set before installing aidge_core library. -Then run in your python environnement : +You will need to install first the ``aidge_core`` library before installing ``aidge_backend_cpu``. + +If you have set a custom install path for the ``aidge_core`` library, make sure to use the same one here. + +Then run in your python environnement : ``` bash pip install . -v ``` @@ -46,4 +48,4 @@ Important: this command can also be run with `make`. To compile the CPU library with the python binding + the associated unitary tests, run ``` make cpu_with_pybind_tests -``` \ No newline at end of file +``` diff --git a/aidge_backend_cpu/unit_tests/test_recipies.py b/aidge_backend_cpu/unit_tests/test_recipies.py new file mode 100644 index 0000000000000000000000000000000000000000..60949adf245f4f4a7ed316879fb307131f70739a --- /dev/null +++ b/aidge_backend_cpu/unit_tests/test_recipies.py @@ -0,0 +1,77 @@ +""" +Copyright (c) 2023 CEA-List + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +SPDX-License-Identifier: EPL-2.0 +""" + +import unittest +import aidge_core +import aidge_backend_cpu + +from functools import reduce +import numpy as np + +class test_recipies(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_fuse_batchnorm(self): + dims = [1, 1, 10, 10] + size = reduce((lambda x, y: x*y), dims) + + input_data = np.arange(size).reshape(dims).astype(np.float32) + input_tensor = aidge_core.Tensor(input_data) + + input_node = aidge_core.Producer(input_tensor, "X") + conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0") + bn = aidge_core.BatchNorm2D(name="Add0") + + graph_view = aidge_core.sequential([conv, bn]) + + # Add random values to conv and BatchNorm parameters + input_node.add_child(graph_view) + input_node.get_operator().set_datatype(aidge_core.DataType.Float32) + input_node.get_operator().set_backend("cpu") + graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_backend("cpu") + + np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32) + np_bias = np.arange(1).reshape([1, 1]).astype(np.float32) + + np_scale = np.array([0.05]).astype(np.float32) + np_shift = np.array([0.05]).astype(np.float32) + np_mean = np.array([0.05]).astype(np.float32) + np_var = np.array([0.05]).astype(np.float32) + conv.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_weights)) + conv.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_bias)) + bn.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_scale)) + bn.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_shift)) + bn.input(3)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_mean)) + bn.input(4)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_var)) + scheduler0 = aidge_core.SequentialScheduler(graph_view) + scheduler0.forward() + + for outNode in graph_view.get_output_nodes(): + output_aidge0 = outNode.get_operator().output(0) + + aidge_core.fuse_batchnorm(graph_view) + scheduler1 = aidge_core.SequentialScheduler(graph_view) + scheduler1.forward() + + for outNode in graph_view.get_output_nodes(): + output_aidge1 = outNode.get_operator().output(0) + + self.assertTrue(aidge_core.approx_eq(output_aidge0, output_aidge1, 0.000001, 0.0001)) + +if __name__ == '__main__': + unittest.main() + + + diff --git a/aidge_backend_cpu/unit_tests/test_tensor.py b/aidge_backend_cpu/unit_tests/test_tensor.py index 1d12fc0cbadf71f04226a98e2e65984abc7e3254..438b6acd51791a52c9e308fb1aceaefb2a45fb29 100644 --- a/aidge_backend_cpu/unit_tests/test_tensor.py +++ b/aidge_backend_cpu/unit_tests/test_tensor.py @@ -45,5 +45,17 @@ class test_tensor(unittest.TestCase): self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference for i,j in zip(t.dims(), np_array.shape): self.assertEqual(i,j) + + def test_get_set(self): + dims = [2,2,2] + + np_array = np.arange(8).reshape(dims) + # Numpy -> Tensor + t = aidge_core.Tensor(np_array) + for i in range(8): + self.assertEqual(t[i], i) + t[i] = 5 + self.assertEqual(t[i], 5) + if __name__ == '__main__': unittest.main() diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 336d549a71f625667e7e3d368819400396b893e1..1f45d700f6fc9f1d69682cb2de601979049c0af6 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -21,6 +21,7 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/ProducerImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index dfcb8afa79c98438ae261a244aee94f4ede6c0b3..012ff5af1c15e73fe76114a23ec62f9ef023bce2 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -47,6 +47,10 @@ class TensorImpl_cpu : public TensorImpl { return mData.data(); }; + void* getRaw(std::size_t idx){ + return static_cast<void*>(static_cast<T *>(rawPtr()) + idx); + }; + virtual ~TensorImpl_cpu() = default; void setRawPtr(void *ptr) override final { diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp index 490598599aedf24b26865ce6a1ddb3fe32044b1b..221e36dcfac44e21d1b1a35674ca21403b4b57ab 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp @@ -20,7 +20,7 @@ namespace Aidge { template <class I1, class O> void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); O* output = static_cast<O*>(output_); @@ -32,7 +32,7 @@ void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* inp template <class I1, class I2, class O> void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); const I2* input2 = static_cast<const I2*>(input2_); O* output = static_cast<O*>(output_); @@ -45,7 +45,7 @@ void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* inp template <class I1, class I2, class I3, class O> void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, const void* input3_, void* output_) { - // FIXME: missing Add parameters as arguments + // FIXME: missing Add attributes as arguments const I1* input1 = static_cast<const I1*>(input1_); const I2* input2 = static_cast<const I2*>(input2_); const I3* input3 = static_cast<const I3*>(input3_); diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index 8373cb84a550efd8741a2dbc04c1e94ad37fe611..cfbcadfe6b719369618955a14c4cde5733ef6773 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -29,11 +29,11 @@ namespace Aidge { class AvgPoolingImpl2DForward_cpu : public Registrable<AvgPoolingImpl2DForward_cpu, std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; class AvgPoolingImpl2DBackward_cpu : public Registrable<AvgPoolingImpl2DBackward_cpu, std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; class AvgPoolingImpl2D_cpu : public OperatorImpl { private: diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp index 776e020f1a20056db345c8e845fd73bb31b4138b..60b4923bdc18674da52be9bd07d9947fb9790f0d 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp @@ -26,51 +26,51 @@ namespace Aidge { * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. * @tparam I Input data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param output_ Output Tensor. */ template <class I, class O> -void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters ¶ms, +void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(attrs)[0] + std::get<2>(attrs)[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(attrs)[1] + std::get<2>(attrs)[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1])/ + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, ch, Xin, Yin) // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(std::get<2>(attrs)[0] - ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(std::get<2>(attrs)[1] - oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; + const std::size_t ix = ox * std::get<0>(attrs)[0]; + const std::size_t iy = oy * std::get<0>(attrs)[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { output[oIndexFull] += static_cast<O>( diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index d9f25b4a8e38510f82fc5afe9ed4b656197a47d5..30557f6cbba05829b3cc9e17364ae4d933a568cf 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -29,7 +29,7 @@ namespace Aidge { class BatchNormImpl2DForward_cpu : public Registrable<BatchNormImpl2DForward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Parameters &, + void(const BatchNorm_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, @@ -41,7 +41,7 @@ class BatchNormImpl2DForward_cpu class BatchNormImpl2DBackward_cpu : public Registrable<BatchNormImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Parameters &, + void(const BatchNorm_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp index eedb80bde60d65b53bac70cc33ca83eb4f0121e7..486829e782ae2173332a7efa6646bb7bba322252 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param scale_ const scale Tensor. @@ -37,9 +37,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class P, class O> -void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const P *scale = static_cast<const P *>(scale_); const P *shift = static_cast<const P *>(shift_); @@ -52,12 +52,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶m const DimSize_t featureMapSize = dims[2]*dims[3]; - if ((freeze == true) || (std::get<1>(params) == 0.0f)) { + if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) { for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t ch = 0; ch < nbChannels; ++ch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params))); + const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs))); for (std::size_t feature = 0; feature<featureMapSize; ++feature) { output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; @@ -81,10 +81,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶m const I inputMean = sum / static_cast<I>(nbDataPerChannel); const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; - batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params); - batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params); + batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs); + batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs); - const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params))); + const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs))); for (std::size_t batch = 0; batch < nbBatch; ++batch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; for (std::size_t feature = 0; feature<featureMapSize; ++feature) { diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 0d21c676d797b2fc4e95c4aea47674c8fca5eef4..2826b635590c5d19f34c8e4beee20fc8dba2183b 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -29,12 +29,12 @@ namespace Aidge { class ConvDepthWiseImpl2DForward_cpu : public Registrable<ConvDepthWiseImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvDepthWiseImpl2DBackward_cpu : public Registrable<ConvDepthWiseImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvDepthWiseImpl2D_cpu : public OperatorImpl { diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index ee2d82e00376c5a2cc5a075565e35eb8885c021e..669bdbc898528b0f96a59dd3c6f8e438ae1291e4 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. @@ -35,9 +35,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); @@ -46,52 +46,52 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameter // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(attrs)[0] + std::get<4>(attrs)[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(attrs)[1] + std::get<4>(attrs)[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, ch, Xin, Yin) // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) { - const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize; + for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) { + const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize; B biasVal = (biases != nullptr) ? biases[ch] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1]; + const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(std::get<4>(attrs)[0] - ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(std::get<4>(attrs)[1] - oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1]; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<4>(attrs)[0]; + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<4>(attrs)[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] * + output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] * input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index 1f3dffe43b966bc37887f267cc56760a899476f9..b9411fe0f1ac079d9857cc8f2178fc98fadc3a77 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -29,12 +29,12 @@ namespace Aidge { class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvImpl2DBackward_cpu : public Registrable<ConvImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, const void *, const void *, void *)> {}; class ConvImpl2D_cpu : public OperatorImpl { diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index bc2f10099f42cba91be8d089b66dc176fdeb7c10..9d4d6dfdfcc114e47e478089c4d5a42c2bee0f28 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ namespace Aidge { * @tparam W Weight data type. * @tparam B Bias data type. * @tparam O Output data type. - * @param params tuple of Parameters from the Operator + * @param params tuple of Attributes from the Operator * @param dims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. @@ -35,9 +35,9 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, +void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments + // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); @@ -45,34 +45,34 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const s /* // output H size const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0])); + static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0])); // output W size const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1])); + static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1])); // TODO: kernel computation // output (Xout, Yout, outCh, batch) // input (Xin, Yin, inCh, batch) // weight (kernelX, kernelY, inCh, outCh) - // does not take Dilation parameter into account + // does not take Dilation attribute into account for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; + const std::size_t ix = ox * std::get<0>(attrs)[0]; + const std::size_t iy = oy * std::get<0>(attrs)[1]; - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox)); + for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { + const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox)); B biasVal = (biases != nullptr) ? biases[outCh] : B(0); for (std::size_t batch = 0; batch < dims[3]; ++batch) { output[oIndex + batch] = biasVal; } for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { - for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { - for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { + for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) { const std::size_t wIndex = - outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx)); + outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx)); std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); for (std::size_t batch = 0; batch < dims[3]; ++batch) { output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; @@ -88,53 +88,53 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const s // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(attrs)[0] + std::get<5>(attrs)[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(attrs)[1] + std::get<5>(attrs)[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / + static_cast<float>(std::get<0>(attrs)[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) // input (batch, inCh, Xin, Yin) // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation parameter into account + // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize; + for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { + const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize; B biasVal = (biases != nullptr) ? biases[outCh] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1]; + const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]); + const signedsize difx = static_cast<signedsize>(std::get<5>(attrs)[0] - ox * std::get<0>(attrs)[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]); + const signedsize dify = static_cast<signedsize>(std::get<5>(attrs)[1] - oy * std::get<0>(attrs)[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1]; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<5>(attrs)[0]; + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<5>(attrs)[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * + output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] * input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index c69cc0b08a58877108c78d6f12c29e9089c2f665..1dfa40439dbba9cdd4fe3436fea30f771678c1ff 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -26,11 +26,11 @@ namespace Aidge { // compute kernel registry for forward and backward class FCImplForward_cpu : public Registrable<FCImplForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t, const void *, const void *, const void *, void *)> {}; class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t, const void *, const void *, const void *, void *)> {}; class FCImpl_cpu : public OperatorImpl { diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index d6acb7dfea3415a8d67384745e16ecdd8bf06324..91e2558a7ef1079cbc9fb11f78fab53ef4246149 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -19,17 +19,17 @@ namespace Aidge { // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims, +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims, // const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments +// // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); // const B* biases = static_cast<const B*>(biases_); // O* output = static_cast<O*>(output_); -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { // std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] = bias; // } @@ -39,10 +39,10 @@ namespace Aidge { // for (std::size_t iy = 0; iy < dims[1]; ++iy) { // for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { // const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { // const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) + -// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) + +// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; // } @@ -53,9 +53,9 @@ namespace Aidge { // } // template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims, +// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims, // const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments +// // FIXME: missing FC attributes as arguments // const I* input = static_cast<const I*>(input_); // const W* weights = static_cast<const W*>(weights_); // const B* biases = static_cast<const B*>(biases_); @@ -63,9 +63,9 @@ namespace Aidge { // // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { // std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // output[oIndex + batch] = bias; // } @@ -74,8 +74,8 @@ namespace Aidge { // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // const std::size_t oIndex = dims[1] * batch; // for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { -// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { +// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; // output[oIndex + outCh] += weights[wIndex] * input[i + batch]; // } // } @@ -83,29 +83,29 @@ namespace Aidge { // } template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize, +void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, const void* input_, const void* weights_, const void* biases_, void* output_) { - // FIXME: missing FC parameters as arguments + // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const W* weights = static_cast<const W*>(weights_); const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (std::get<1>(params)) { - std::fill(output, output+(batchSize*std::get<0>(params)), B(0)); + if (std::get<1>(attrs)) { + std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0)); } else { for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params))); + std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs))); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(params); ++out) { - output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize, + for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { + output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, input + (batch + 1)*oneInputSize, weights + out*oneInputSize, - output[out + batch*std::get<0>(params)]); + output[out + batch*std::get<0>(attrs)]); } } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index abe167bea16de01f861beb9701f747d39f265d9d..386ef999fddbda184edee88723d213f53ff62ded 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -24,10 +24,10 @@ namespace Aidge { // compute kernel registry for forward and backward class LeakyReLUImplForward_cpu - : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { }; class LeakyReLUImplBackward_cpu - : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { }; class LeakyReLUImpl_cpu : public OperatorImpl { diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp index ff9a8ac6a8f968f244429b330401d794f16fac01..761b9579c3c3dc187e4b0fac24812fa77f916e65 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp @@ -18,14 +18,14 @@ namespace Aidge { template <class I, class O> -void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params, +void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - I negativeSlope = static_cast<I>(std::get<0>(params)); + I negativeSlope = static_cast<I>(std::get<0>(attrs)); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bf8e31efd253ee8855f3473ef0b4a60c59a04b5f --- /dev/null +++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp @@ -0,0 +1,76 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_ +#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class MatMul_Op; + +// compute kernel registry for forward and backward +class MatMulImplForward_cpu + : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, + void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + const void *, const void *, void *)> {}; +class MatMulImplBackward_cpu + : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, + void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + const void *, const void *, void *)> {}; + +class MatMulImpl_cpu : public OperatorImpl { +private: + const MatMul_Op &mOp; + std::array<NbElts_t, 2> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + +public: + MatMulImpl_cpu(const MatMul_Op &op) + : mOp(op), + mNbConsumedData({0, 0}), + mNbProducedData({0}) + { + // ctor + } + + static std::unique_ptr<MatMulImpl_cpu> create(const MatMul_Op &op) + { + return std::make_unique<MatMulImpl_cpu>(op); + } + +public: + NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, + const std::vector<DimSize_t> & /*inputsSize*/) const override final; + NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + + void updateConsummerProducer() override final; + + void forward(); + void backward(); +}; + +namespace { +static Registrar<MatMul_Op> registrarMatMulImpl_cpu("cpu", Aidge::MatMulImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bc52779eff274379a853ea84fb839c9486652433 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp @@ -0,0 +1,58 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <algorithm> + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" + +namespace Aidge { + +template <class I, class W, class O> +void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, + const void* input_, const void* weights_, void* output_) { + // FIXME: missing MatMul parameters as arguments + const I* input = static_cast<const I*>(input_); + const W* weights = static_cast<const W*>(weights_); + O* output = static_cast<O*>(output_); + + + std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); + + for (std::size_t batch = 0; batch < batchSize; ++batch) { + for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { + output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, + input + (batch + 1)*oneInputSize, + weights + out*oneInputSize, + output[out + batch*std::get<0>(attrs)]); + } + } +} + + +namespace { +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); +static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); +} // namespace + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp index cb11b3016b9f694cc518f20a62ea143a94a58afe..37549349b9f5ffbf443d976135db05b4cec209b7 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -18,16 +18,17 @@ #include "aidge/utils/Types.h" #include <memory> #include <vector> +#include <array> namespace Aidge { // class Scaling_Op; // compute kernel registry for forward and backward class ScalingImplForward_cpu - : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { }; class ScalingImplBackward_cpu - : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Parameters&, std::size_t, const void*, void*)> { + : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { }; class ScalingImpl_cpu : public OperatorImpl { @@ -47,7 +48,7 @@ class ScalingImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp index c5b06290ee04ecf9759f418cd26d83e889fcc84e..8fe13bce3a4c470d77b083603d3b889a46fda71f 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp @@ -18,14 +18,14 @@ namespace Aidge { template <class I, class O> -void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Parameters& params, +void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs, std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - I scalingFactor = static_cast<I>(std::get<0>(params)); + const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs)); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = input[i] * scalingFactor; diff --git a/setup.py b/setup.py index 0b0f66e9132d66cdb6385d7f8c6c69ae0cc5d0e3..16305afdfdfa5de2e328460d9e96c77eb96a9d98 100644 --- a/setup.py +++ b/setup.py @@ -62,11 +62,11 @@ class CMakeBuild(build_ext): os.chdir(str(build_temp)) - # Impose to use the executable of the python + # Impose to use the executable of the python # used to launch setup.py to setup PythonInterp param_py = "-DPYTHON_EXECUTABLE=" + sys.executable - - install_path = f"{build_temp}/install" if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] + + install_path = os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}']) if not self.dry_run: @@ -83,11 +83,11 @@ class CMakeBuild(build_ext): for file in files: if file.endswith('.so') and (root != str(aidge_package.absolute())): currentFile=os.path.join(root, file) - shutil.copy(currentFile, str(aidge_package.absolute())) + shutil.copy(currentFile, str(aidge_package.absolute())) # Copy version.txt in aidge_package os.chdir(os.path.dirname(__file__)) - shutil.copy("version.txt", str(aidge_package.absolute())) + shutil.copy("version.txt", str(aidge_package.absolute())) if __name__ == '__main__': diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index d3da42185237a59146af17199e34a00dbebd6d96..be7923339308073c26b60ee0349a44037769765a 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -99,6 +99,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOInd Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; // avoid unused warning const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), @@ -167,6 +168,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOInd Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; // avoid unused warning const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 6c434a5c38853a1dee66db5be95b6b1bfdde8162..b1f82bbb4323a402d698d772966409e1a8f7224b 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -70,7 +70,7 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { Registrar<AvgPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(mOp.getStaticAttributes(), mOp.getInput(0)->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index a0d4d032ded9ede1b2dba307aa967af330167d25..90ee2b7a2361166109568e317a1788137150a8d1 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -76,7 +76,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() { mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(mOp.getStaticAttributes(), mOp.getInput(0)->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 3e920cf68366b82bce8df29c8aea0c838e6a1364..7801f64ef46ced22d95af47b8b0e8cc9888a81da 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -77,7 +77,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index b4ddf80929923a9c2c5998ac8614ebb0d3afe000..edab4432fd5792f27ea158f265641855532d6d0b 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -75,7 +75,7 @@ void Aidge::ConvImpl2D_cpu::forward() { mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 086902be0ab1c2027a8c62c143bc27921e5e9e1b..3cf1ccf6e951ea05521ef67c99a3e628e0f620f5 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -98,7 +98,7 @@ void Aidge::FCImpl_cpu::forward() // Call kernel // if (mOp.getInput(0)->nbDims() == 4) { // kernelFunc( - // mOp.getParams(), + // mOp.getStaticAttributes(), // std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), // mOp.getInput(0)->getImpl()->rawPtr(), // mOp.mInputs[1]->getImpl()->rawPtr(), @@ -107,7 +107,7 @@ void Aidge::FCImpl_cpu::forward() // } // else kernelFunc( - mOp.getParams(), + mOp.getStaticAttributes(), mOp.getInput(0)->dims()[0], mOp.getInput(0)->sizeM1(), mOp.getInput(0)->getImpl()->rawPtr(), diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index f6a44d381081c7c7f1dcbbf02d91212168cc07aa..316d3641bb960ed8850a94f40186b77cc8522b58 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -65,7 +65,7 @@ void Aidge::LeakyReLUImpl_cpu::forward() { mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..972e1f0fc87ad00afe670d77afc8617137076a08 --- /dev/null +++ b/src/operator/MatMulImpl.cpp @@ -0,0 +1,121 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/MatMul.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const +{ + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto &inputDims + = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate( + inputDims.begin(), + inputDims.end(), + Aidge::NbElts_t(1), + std::multiplies<Aidge::NbElts_t>()); +} + +Aidge::NbElts_t + Aidge::MatMulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const +{ + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::MatMulImpl_cpu::getRequiredMemory( + const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const +{ + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; + + const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate( + outputDims.begin(), + outputDims.end(), + static_cast<NbElts_t>(1), + std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const +{ + assert((inputIdx != gk_IODefaultIndex) && (inputIdx < mNbConsumedData.size())); + return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +} + +Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const +{ + assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); + return mNbProducedData[static_cast<std::size_t>(outputIdx)]; +} + +void Aidge::MatMulImpl_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] + += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + + mNbProducedData[0] += getRequiredMemory(0, {}); +} + +void Aidge::MatMulImpl_cpu::forward() +{ + // FIXME: uncomment the following code once memory handling will work + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.mInputs[1] && "missing input #1"); + + // Find the correct kernel type + auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( + {mOp.getInput(0)->dataType(), + mOp.mInputs[1]->dataType(), + mOp.getOutput(0)->dataType()}); + + // Call kernel + // if (mOp.getInput(0)->nbDims() == 4) { + // kernelFunc( + // mOp.getStaticAttributes(), + // std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), + // mOp.getInput(0)->getImpl()->rawPtr(), + // mOp.mInputs[1]->getImpl()->rawPtr(), + // mOp.mInputs[2]->getImpl()->rawPtr(), + // mOp.getOutput(0)->getImpl()->rawPtr()); + // } + // else + kernelFunc( + mOp.getStaticAttributes(), + mOp.getInput(0)->dims()[0], + mOp.getInput(0)->sizeM1(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); + + +} + +void Aidge::MatMulImpl_cpu::backward() +{ + printf("Not implemented yet.\n"); +} diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index 0770a0ccc1434d03fc26b07c425053cd7c09bee6..84cd6ee33a8316a24bae472c74c039dabe0afba3 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -10,16 +10,14 @@ ********************************************************************************/ #include <cassert> -#include <chrono> // std::chrono::milliseconds -#include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for +#include <numeric> // std::accumulate +#include <functional> // std::multiplies #include "aidge/operator/Scaling.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" -#include <numeric> #include <vector> // FIXME: replace whole Tensor with minimum needed data quantity @@ -38,7 +36,9 @@ Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIn return 0; } -Aidge::NbElts_t Aidge::ScalingImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::ScalingImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t> &inputsSize) const { + (void) outputIdx; + (void) inputsSize; const auto& outputDims = mOp.getOutput(0)->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); @@ -68,7 +68,7 @@ void Aidge::ScalingImpl_cpu::forward() { mOp.getOutput(0)->dataType()}); // Call kernel - kernelFunc(mOp.getParams(), + kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp similarity index 78% rename from unit_tests/Test_TensorImpl.cpp rename to unit_tests/data/Test_TensorImpl.cpp index d28505f7b6f2961e581cadee778bdb16364353de..6c75c4dc19ff1b646308858ad262441d43390122 100644 --- a/unit_tests/Test_TensorImpl.cpp +++ b/unit_tests/data/Test_TensorImpl.cpp @@ -41,12 +41,12 @@ TEST_CASE("Tensor creation") { } SECTION("get function") { - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 0})) == 1); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 1})) == 2); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 1, 1})) == 4); - REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 0})) == 7); - x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) = 36; - REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) == 36); + REQUIRE(x.get<int>({0, 0, 0}) == 1); + REQUIRE(x.get<int>({0, 0, 1}) == 2); + REQUIRE(x.get<int>({0, 1, 1}) == 4); + REQUIRE(x.get<int>({1, 1, 0}) == 7); + x.get<int>({1, 1, 1}) = 36; + REQUIRE(x.get<int>({1, 1, 1}) == 36); } SECTION("Pretty printing for debug") { REQUIRE_NOTHROW(x.print()); } diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index e24d7ac6bd97586ebdeddce5ccb75807ddf530f0..18d98d169ddcb74310c5153d7c2c95103c395bb7 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -19,25 +19,25 @@ using namespace Aidge; TEST_CASE("[cpu/operator] Add(forward)") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { - { - { - {{20, 47},{21, 48},{22, 49}}, - {{23, 50},{24, 51},{25, 52}}, - {{26, 53},{27, 54},{28, 55}} - }, - { - {{29, 56},{30, 57},{31, 58}}, - {{32, 59},{33, 60},{34, 61}}, - {{35, 62},{36, 63},{37, 64}} - }, - { - {{38, 65},{39, 66},{40, 67}}, - {{41, 68},{42, 69},{43, 70}}, - {{44, 71},{45, 72},{46, 73}} - } - } - }); + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // SECTION("One input") { std::shared_ptr<Node> myAdd = Add<1>(); @@ -51,7 +51,7 @@ TEST_CASE("[cpu/operator] Add(forward)") { } SECTION("Two inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { { {{40, 94},{42, 96},{44, 98}}, @@ -81,9 +81,9 @@ TEST_CASE("[cpu/operator] Add(forward)") { REQUIRE(*std::static_pointer_cast<Tensor>(myAdd->getOperator()->getOutput(0)) == *expectedOutput); } - + SECTION("Three inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { { {{ 60, 141},{ 63, 144},{ 66, 147}}, diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp index 7096962e196c2ace4abf2b0b14aca8dfa37d3441..d5bd91ff75404a7b928c8919c64e06315b78206f 100644 --- a/unit_tests/operator/Test_LeakyReLUImpl.cpp +++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp @@ -153,7 +153,7 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") { REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput); } - SECTION("Test construction parameter: negative_slop") { + SECTION("Test construction attribute: negative_slop") { std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> { {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f} }); diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0da01b3287043e07e5b967df8882960cfb814f8f --- /dev/null +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -0,0 +1,108 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MatMul.hpp" + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul]") { + // Test MatMul forward with batch size = 2 and feature size = 75 + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ + {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ + {{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}}); + + std::shared_ptr<Node> myMatMul = MatMul(5, "mymatmul"); + myMatMul->getOperator()->setDatatype(DataType::Int32); + myMatMul->getOperator()->setBackend("cpu"); + myMatMul->getOperator()->associateInput(1, myWeights); + + SECTION("2D input") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, + {75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); + myMatMul->getOperator()->associateInput(0, myInput); + myMatMul->getOperator()->computeOutputDims(); + myMatMul->forward(); + REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput); + } + SECTION("4D input") { + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, + {5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + {20, 21, 22, 23, 24}}, + {{25, 26, 27, 28, 29}, + {30, 31, 32, 33, 34}, + {35, 36, 37, 38, 39}, + {40, 41, 42, 43, 44}, + {45, 46, 47, 48, 49}}, + {{50, 51, 52, 53, 54}, + {55, 56, 57, 58, 59}, + {60, 61, 62, 63, 64}, + {65, 66, 67, 68, 69}, + {70, 71, 72, 73, 74}}}, + {{{75, 76, 77, 78, 79}, + {80, 81, 82, 83, 84}, + {85, 86, 87, 88, 89}, + {90, 91, 92, 93, 94}, + {95, 96, 97, 98, 99}}, + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}}}}}); + myMatMul->getOperator()->associateInput(0, myInput); + myMatMul->getOperator()->computeOutputDims(); + myMatMul->forward(); + REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput); + } + + // std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; +} \ No newline at end of file diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp similarity index 100% rename from unit_tests/Test_Scheduler.cpp rename to unit_tests/scheduler/Test_Scheduler.cpp