diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 5c1f9b111f41a435aa477d0647fa66fb29a058fb..ecf111a7d58f3af8394e079fdb2b29b4c4e4eba3 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -58,6 +58,7 @@ #include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" +#include "aidge/backend/cpu/operator/SumImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/operator/WeightInterleavedImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index e82352d9cba60440efef87faf97dfd4ed66565b6..4daa522ff508b1e55874b0d3e6b2688534b1f8be 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -27,6 +27,8 @@ using FCImpl_cpu = OperatorImpl_cpu<FC_Op, void(const DimSize_t, const DimSize_t, const DimSize_t, + const float, + const float, const void *, const void *, const void *, @@ -34,6 +36,8 @@ using FCImpl_cpu = OperatorImpl_cpu<FC_Op, void(const DimSize_t, const DimSize_t, const DimSize_t, + const float, + const float, const void *, const void *, const void *, diff --git a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp index c57f86e6ac6e74acebb48f471991e7181920f7c3..873830d39ac9c055d88272bc23db9f7c4f549551 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp @@ -18,107 +18,63 @@ #include "aidge/utils/Registrar.hpp" namespace Aidge { -// template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC attributes as arguments -// const I* input = static_cast<const I*>(input_); -// const W* weights = static_cast<const W*>(weights_); -// const B* biases = static_cast<const B*>(biases_); -// O* output = static_cast<O*>(output_); - -// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { -// std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; -// for (std::size_t batch = 0; batch < dims[3]; ++batch) { -// output[oIndex + batch] = bias; -// } -// } - -// for (std::size_t ix = 0; ix < dims[0]; ++ix) { -// for (std::size_t iy = 0; iy < dims[1]; ++iy) { -// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { -// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { -// const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize + -// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; -// for (std::size_t batch = 0; batch < dims[3]; ++batch) { -// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; -// } -// } -// } -// } -// } -// } - -// template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC attributes as arguments -// const I* input = static_cast<const I*>(input_); -// const W* weights = static_cast<const W*>(weights_); -// const B* biases = static_cast<const B*>(biases_); -// O* output = static_cast<O*>(output_); - -// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] - -// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { -// std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; -// for (std::size_t batch = 0; batch < dims[0]; ++batch) { -// output[oIndex + batch] = bias; -// } -// } - -// for (std::size_t batch = 0; batch < dims[0]; ++batch) { -// const std::size_t oIndex = dims[1] * batch; -// for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { -// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; -// output[oIndex + outCh] += weights[wIndex] * input[i + batch]; -// } -// } -// } -// } - template <class I, class W, class B, class O> void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, const DimSize_t inputFeatureSize, const DimSize_t outputFeatureSize, + const float alpha_, + const float beta_, const void* input_, const void* weights_, const void* biases_, void* output_) { - // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const W* weights = static_cast<const W*>(weights_); const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); + const O alpha = static_cast<O>(alpha_); + const O beta = static_cast<O>(beta_); + if (biases == nullptr) { - std::fill(output, output+(batchSize*outputFeatureSize), B(0)); - } - else { + std::fill(output, output + (batchSize * outputFeatureSize), O(0)); + } else { + /* TODO: If we want to support batched Biases: + for (std::size_t batch = 0; batch < batchSize; ++batch) { + std::transform( + biases, biases + outputFeatureSize, output + batch * outputFeatureSize, + [beta](const B& bias) { return beta * static_cast<O>(bias); } + ); + + */ for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); + std::transform( + biases, biases + outputFeatureSize, output + batch * outputFeatureSize, + [beta](const B& bias) { return beta * static_cast<O>(bias); } + ); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { for (std::size_t out = 0; out < outputFeatureSize; ++out) { - output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, - input + (batch + 1)*inputFeatureSize, - weights + out*inputFeatureSize, - output[out + batch*outputFeatureSize]); + O sum = O(0); + for (std::size_t i = 0; i < inputFeatureSize; ++i) { + std::size_t inputIdx = batch * inputFeatureSize + i; + std::size_t weightIdx = out * inputFeatureSize + i; + sum += static_cast<O>(input[inputIdx]) * static_cast<O>(weights[weightIdx]); + } + output[batch * outputFeatureSize + out] += alpha * sum; } } + } template <class I, class O, class W, class B> void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, const DimSize_t inputFeatureSize, const DimSize_t outputFeatureSize, + const float alpha_, + const float beta_, const void* input_, const void* originalInput_, const void* weight_, @@ -127,48 +83,63 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, void* biasesGrad_) { // FIXME: missing FC attributes as arguments - const I* input = static_cast<const I*>(input_); - const I* originalInput = static_cast<const I*>(originalInput_); - const W* weight = static_cast<const W*>(weight_); - O* output = static_cast<O*>(output_); - W* weightGrad = static_cast<W*>(weightGrad_); - B* biasesGrad = static_cast<B*>(biasesGrad_); - - - // bias grad - if (biasesGrad == nullptr) { // no bias - std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); - } else { - for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs - B sum{0}; + const I* outputGrad = static_cast<const I*>(input_); // dY + const I* originalInput = static_cast<const I*>(originalInput_); // X (Input in forward pass) + const W* weight = static_cast<const W*>(weight_); // W + O* inputGrad = static_cast<O*>(output_); // dX + W* weightGrad = static_cast<W*>(weightGrad_); // dW + B* biasesGrad = static_cast<B*>(biasesGrad_); // dB + + const O alpha = static_cast<O>(alpha_); + const O beta = static_cast<O>(beta_); + + // Compute bias gradient: dB = beta * dB + alpha * dY + if (biasesGrad != nullptr) { + /* TODO: If we want to support batched Biases: + for (std::size_t b = 0; b < batchSize; ++b) { + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + biasesGrad[b * outputFeatureSize + o] = beta * outputGrad[b * outputFeatureSize + o]; + } + } + */ + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + O sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += input[b*outputFeatureSize + o]; + sum += outputGrad[b * outputFeatureSize + o]; } - biasesGrad[o] = sum; + biasesGrad[o] = beta * sum; } } - // weight grad + // Compute weight gradient: dW = dY^T * X for (std::size_t o = 0; o < outputFeatureSize; ++o) { for (std::size_t c = 0; c < inputFeatureSize; ++c) { - W sum{0}; + O sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; + std::size_t inputIdx = b * inputFeatureSize + c; + std::size_t outputIdx = b * outputFeatureSize + o; + sum += originalInput[inputIdx] * outputGrad[outputIdx]; } - weightGrad[o*inputFeatureSize + c] = sum; + std::size_t weightIdx = o * inputFeatureSize + c; + weightGrad[weightIdx] = alpha * sum; } } - // input grad + + // Compute input gradient: dX = dY * W^T for (std::size_t b = 0; b < batchSize; ++b) { for (std::size_t c = 0; c < inputFeatureSize; ++c) { O sum{0}; for (std::size_t o = 0; o < outputFeatureSize; ++o) { - sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; + std::size_t weightIdx = o * inputFeatureSize + c; + std::size_t outputIdx = b * outputFeatureSize + o; + sum += weight[weightIdx] * outputGrad[outputIdx]; } - output[b*inputFeatureSize + c] = sum; + std::size_t inputIdx = b * inputFeatureSize + c; + inputGrad[inputIdx] = alpha * sum; } } + } // Kernels registration to implementation entry point diff --git a/include/aidge/backend/cpu/operator/SumImpl.hpp b/include/aidge/backend/cpu/operator/SumImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..54d6897959ebd91462317628400a437dd18c9bf9 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SumImpl.hpp @@ -0,0 +1,36 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + + #ifndef AIDGE_CPU_OPERATOR_SUMIMPL_H_ + #define AIDGE_CPU_OPERATOR_SUMIMPL_H_ + + #include <cstddef> // std::size_t + #include <memory> // std::unique_ptr, std::make_unique + #include <string> + #include <vector> + + #include "aidge/backend/cpu/operator/OperatorImpl.hpp" + #include "aidge/operator/Sum.hpp" + #include "aidge/utils/Registrar.hpp" + #include "aidge/utils/Types.h" + + namespace Aidge { + // Operator implementation entry point for the backend + using SumImpl_cpu = OperatorImpl_cpu<Sum_Op, + void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)>; + + + // Implementation entry point registration to Operator + REGISTRAR(Sum_Op, "cpu", Aidge::SumImpl_cpu::create); + } // namespace Aidge + + #endif /* AIDGE_CPU_OPERATOR_SUMIMPL_H_ */ + \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/SumImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SumImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c5e137eb7a428cc84cce83d9d078c47fa027c52 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SumImpl_kernels.hpp @@ -0,0 +1,59 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + + #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ + #define AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ + + #include "aidge/utils/Registrar.hpp" + + #include <cstdint> // std::int32_t, std::int64_t + + #include "aidge/backend/cpu/data/Broadcasting.hpp" + #include "aidge/backend/cpu/operator/SumImpl.hpp" + + namespace Aidge { + + template <class I, class O> + void SumImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const std::vector<std::vector<std::size_t>>& inputDims, const std::size_t outputLength, const std::vector<std::size_t>& outDims, void* output_) { + std::vector<const I*> inputs; + for (const auto& input_ : inputs_) { + inputs.push_back(static_cast<const I*>(input_)); + } + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) + { + output[oIndex] = 0; + std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex); + for(std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { + std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes); + output[oIndex] += inputs[iIndex][idx]; + } + } + } + + // Kernels registration to implementation entry point + REGISTRAR(SumImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::SumImpl_cpu_forward_kernel<float, float>, nullptr}); + REGISTRAR(SumImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::SumImpl_cpu_forward_kernel<double, double>, nullptr}); + REGISTRAR(SumImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::SumImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); + REGISTRAR(SumImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::SumImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); + } // namespace Aidge + + #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */ + \ No newline at end of file diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 359452712f94be078122266089cc1da89baf50d5..821f673de74e2ec2a592e181912e0f912bef4767 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -40,12 +40,13 @@ void Aidge::FCImpl_cpu::forward() const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor(); - // Call kernel - const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; + const auto batchSize = input0.size() /input1.dims()[1]; impl.forward(batchSize, input1.dims()[1], // nb input features input1.dims()[0], // nb output features + op_.alpha(), + op_.beta(), input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, @@ -71,12 +72,14 @@ void Aidge::FCImpl_cpu::backward() const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor(); - // Call kernel - const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; + const DimSize_t nbInFeat = input1grad.dims()[1]; + const auto batchSize = input0grad.size() /nbInFeat; impl.backward(batchSize, - input1grad.dims()[1], // nb input features + nbInFeat, // nb input features input1grad.dims()[0], // nb output features + op_.alpha(), + op_.beta(), getCPUPtr(fc_grad), getCPUPtr(op_.getInput(0)), getCPUPtr(mOp.getRawInput(1)), diff --git a/src/operator/SumImpl.cpp b/src/operator/SumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..436fd78cbc50efae92dd695b215ad5359cdc3530 --- /dev/null +++ b/src/operator/SumImpl.cpp @@ -0,0 +1,71 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + + #include "aidge/backend/cpu/operator/SumImpl.hpp" + + #include <cassert> + #include <vector> + + #include "aidge/backend/cpu/data/GetCPUPtr.h" + #include "aidge/backend/cpu/operator/SumImpl_kernels.hpp" + #include "aidge/data/Data.hpp" + #include "aidge/data/Tensor.hpp" + #include "aidge/utils/Types.h" + #include "aidge/utils/ErrorHandling.hpp" + +template <> +void Aidge::SumImpl_cpu::forward() { + const Sum_Op& op = static_cast<const Sum_Op&>(mOp); + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Sum operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Sum forward because input#0 has no implementation."); + DataType datatypeFirstInput = op.getInput(0)->dataType(); + for (IOIndex_t i = 1; i < op.nbInputs(); ++i) { + AIDGE_ASSERT(op.getInput(i), "missing input in Sum operator"); + AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Sum forward because the input#{} has no implementation.", i); + AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot sum inputs with two differents data type."); + } + + // Find the correct kernel type + const auto impl = Registrar<SumImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + const std::size_t nbDims = op.getOutput(0)->nbDims(); + std::vector<std::vector<std::size_t>> inputsDims; + std::vector<const void*> opInputs; + std::vector<std::shared_ptr<Tensor>> inputsFallback(op.nbInputs()); + for (IOIndex_t i = 0; i < op.nbInputs(); ++i) { + std::vector<std::size_t> inputDims(nbDims, 1); + auto dims = op.getInput(i)->dims(); + for(std::size_t j=dims.size()-1; j+1>0; --j) + { + std::size_t idx = nbDims - (dims.size()-j); + inputDims[idx] = dims[j]; + } + inputsDims.push_back(inputDims); + const auto& input = op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0)); + opInputs.push_back(input.getImpl()->rawPtr()); + } + + impl.forward(opInputs, + inputsDims, + op.getOutput(0)->size(), + op.getOutput(0)->dims(), + getCPUPtr(op.getRawOutput(0))); +} + + +template <> +void Aidge::SumImpl_cpu::backward() { +} \ No newline at end of file diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp index 8ac0afc33152f4ae110b1c3ef0b4e88f37b00e99..a84ab63a1c866a16f2f3977be79f8803208d7856 100644 --- a/unit_tests/operator/Test_FCImpl.cpp +++ b/unit_tests/operator/Test_FCImpl.cpp @@ -19,6 +19,7 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/FC.hpp" #include "aidge/utils/ArrayHelpers.hpp" +#include "aidge/utils/TensorUtils.hpp" using namespace Aidge; @@ -48,7 +49,7 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { Tensor myOutput = Array2D<int, 2, 5>{ {{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}}; - std::shared_ptr<Node> myFC = FC(75, 5, false, "myfc"); + std::shared_ptr<Node> myFC = FC(75, 5, 1.0f, 1.0f, false, "myfc"); auto op = std::static_pointer_cast<FC_Op>(myFC -> getOperator()); op -> setDataType(DataType::Int32); op -> setBackend("cpu"); @@ -106,6 +107,66 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") { myFC->forward(); REQUIRE(*(op->getOutput(0)) == myOutput); } +} - // std::cout << static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << std::endl; + +TEST_CASE("[cpu/oeprator] FC(backward)", "[FC][CPU]") { + SECTION("2D Input 1D Bias"){ + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array2D<float, 4, 3>{ + { + {0.55043954, -0.080161572, 0.18495631}, + {-0.82497174, -0.95155114, 0.25449812}, + {1.6508394, 0.2518357, -0.49999624}, + {0.82770473, 0.28659272, -0.11644308} + }}); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<float, 2, 3>{ + {{0.044322353, 1.9578923, -1.96035}, + { -1.1458585, -0.8235659, 0.24195994}}}); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float, 2>{{1.5327742, 0.90154403}}); + + Tensor myOutput = Tensor(Array2D<float, 4, 2>{ + {{1.0376441, 0.38158852}, + {-0.86573052, 2.6920884}, + {3.0791781, -1.3184667}, + {2.3588469, -0.31109101}}}); + + std::shared_ptr<Node> myFC = FC(3, 2, 1.0f, 1.0f, false, "myfc"); + auto op = std::static_pointer_cast<OperatorTensor>(myFC -> getOperator()); + op -> associateInput(0, myInput); + op -> associateInput(1, myWeights); + op -> associateInput(2, myBias); + op -> setDataType(DataType::Float32); + op -> setBackend("cpu"); + myFC->forward(); + + REQUIRE(approxEq<float>(*(op->getOutput(0)), myOutput)); + + // Backward + std::shared_ptr<Tensor> myOutputGrad = + std::make_shared<Tensor>(Array2D<float, 4, 2>{ + { + {1.373911, -1.2312084}, + {0.24750818, -0.71446633}, + {-1.5132738, -0.23136522}, + {0.20452768, -1.2200259} + }}); + Tensor expectedInputGrad = Tensor(Array2D<float, 4, 3>{ + { + {1.4716856, 3.7039511, -2.9912496}, + {0.82964748, 1.0730045, -0.65807492}, + {0.19803995, -2.7722826, 2.9105654}, + {1.4070423, 1.4052149, -0.69614327} + }}); + Tensor expectedWeightsGrad = Tensor(Array2D<float, 2, 3>{ + {{-1.7768159, -0.66813177, 1.0499192}, + {-1.4800593, 0.37063029, -0.15180479}}}); + Tensor expectedBiasGrad = Tensor(Array1D<float, 2>{{0.31267303, -3.397066 }}); + + op->getOutput(0)->setGrad(myOutputGrad); + myFC->backward(); + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), expectedInputGrad)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), expectedWeightsGrad)); + REQUIRE(approxEq<float>(*(op->getInput(2)->grad()), expectedBiasGrad)); + } } \ No newline at end of file diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 7b0b80d816eba8000e782e0e5238c2550dd4eed9..7c1718dd7679cdc924866eaad3d1ab2453db467d 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -702,8 +702,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { auto init = std::make_shared<Tensor>(Array2D<float, 2, 5>{}); uniformFiller<float>(init, 0.0, 0.0); - auto fc1 = FC(inChannels, outChannels, true, "myfc"); - auto fc2 = FC(outChannels, inChannels, true, "fc2"); + auto fc1 = FC(inChannels, outChannels, 1.0f, 1.0f, true, "myfc"); + auto fc2 = FC(outChannels, inChannels, 1.0f, 1.0f, true, "fc2"); // NOTE: Account for init step by adding 1 to the max timestep // parameter. auto lif1 = Leaky(nbTimeSteps + 1, beta, threshold, LeakyReset::Subtraction, "leaky"); diff --git a/unit_tests/operator/Test_SumImpl.cpp b/unit_tests/operator/Test_SumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c3e81a90ffa3614a7f73c8fccb70a751e03ae691 --- /dev/null +++ b/unit_tests/operator/Test_SumImpl.cpp @@ -0,0 +1,176 @@ +/******************************************************************************** + * Copyright (c) 2025 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + + #include <catch2/catch_test_macros.hpp> + + #include "aidge/data/Tensor.hpp" + #include "aidge/operator/Sum.hpp" + + #include "aidge/backend/cpu.hpp" + + using namespace Aidge; + + TEST_CASE("[cpu/operator] Sum(forward)", "[Sum][CPU]") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + + SECTION("One input") { + std::shared_ptr<Node> mySum = Sum(1); + auto op = std::static_pointer_cast<OperatorTensor>(mySum -> getOperator()); + op->associateInput(0, input1); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + mySum->forward(); + + REQUIRE(*(op->getOutput(0)) == *input1); + } + + SECTION("Two inputs") { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{40, 94},{42, 96},{44, 98}}, + {{46, 100},{48, 102},{50, 104}}, + {{52, 106},{54, 108},{56, 110}} + }, + { + {{58, 112},{60, 114},{62, 116}}, + {{64, 118},{66, 120},{68, 122}}, + {{70, 124},{72, 126},{74, 128}} + }, + { + {{76, 130},{78, 132},{80, 134}}, + {{82, 136},{84, 138},{86, 140}}, + {{88, 142},{90, 144},{92, 146}} + } + } + }); + + std::shared_ptr<Node> mySum = Sum(2); + auto op = std::static_pointer_cast<OperatorTensor>(mySum -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input1); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + mySum->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + } + + SECTION("Three inputs") { + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{ 60, 141},{ 63, 144},{ 66, 147}}, + {{ 69, 150},{ 72, 153},{ 75, 156}}, + {{ 78, 159},{ 81, 162},{ 84, 165}} + }, + { + {{ 87, 168},{ 90, 171},{ 93, 174}}, + {{ 96, 177},{ 99, 180},{102, 183}}, + {{105, 186},{108, 189},{111, 192}} + }, + { + {{114, 195},{117, 198},{120, 201}}, + {{123, 204},{126, 207},{129, 210}}, + {{132, 213},{135, 216},{138, 219}} + } + } + }); + + std::shared_ptr<Node> mySum = Sum(3); + auto op = std::static_pointer_cast<OperatorTensor>(mySum -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input1); + op->associateInput(2, input1); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + mySum->forward(); + + REQUIRE(*op->getOutput(0) == *expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<int,3,1,3,2> { + { // + { // + {{0, 1},{2, 3},{4, 5}} // + }, // + { // + {{6, 7},{8, 9},{10, 11}} // + }, // + { // + {{12, 13},{14, 15},{16, 17}} // + } // + } // + }); // + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{20, 21},{22, 23},{24, 25}}, // + {{26, 27},{28, 29},{30, 31}}, // + {{32, 33},{34, 35},{36, 37}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{ 120, 222},{ 124, 226},{ 128, 230}}, // + {{ 126, 228},{ 130, 232},{ 134, 236}}, // + {{ 132, 234},{ 136, 238},{ 140, 242}} // + }, // + { // + {{ 126, 228},{ 130, 232},{ 134, 236}}, // + {{ 132, 234},{ 136, 238},{ 140, 242}}, // + {{ 138, 240},{ 142, 244},{ 146, 248}} // + }, // + { // + {{ 132, 234},{ 136, 238},{140, 242}}, // + {{ 138, 240},{ 142, 244},{146, 248}}, // + {{ 144, 246},{ 148, 250},{152, 254}} // + } // + } // + }); // + + std::shared_ptr<Node> mySum = Sum(3); + auto op = std::static_pointer_cast<OperatorTensor>(mySum -> getOperator()); + op->associateInput(0, input_0); + op->associateInput(1, input_1); + op->associateInput(2, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + mySum->forward(); + op->getOutput(0)->print(); + expectedOutput->print(); + REQUIRE(*op->getOutput(0) == *expectedOutput); + } + } + \ No newline at end of file diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp index b78e864fecab1fd103a2cb30924d10a25f5b8f10..3f1538a44e5558bbe8530e0eea3f00cb3584795e 100644 --- a/unit_tests/scheduler/Test_CastMove.cpp +++ b/unit_tests/scheduler/Test_CastMove.cpp @@ -56,7 +56,7 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { Conv(1, 3, {3, 3}, "conv1"), Conv(3, 4, {1, 1}, "conv2"), Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + FC(27, 5, 1.0f, 1.0f, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); @@ -158,7 +158,7 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") { Conv(1, 3, {3, 3}, "conv1"), Conv(3, 4, {1, 1}, "conv2"), Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + FC(27, 5, 1.0f, 1.0f, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index be87e8ac77020b5c05469fb959752a66512e6ffb..1361b8a27a3ffca84f7248bf116298dba16a5bee 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -71,7 +71,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { Conv(1, 3, {3, 3}, "conv1"), Conv(3, 4, {1, 1}, "conv2"), Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + FC(27, 5, 1.0f, 1.0f, false, "fc")}); g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1); @@ -173,7 +173,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { Conv(3, 3, {1, 1}, "conv1.3")}), Add("add2"), Conv(3, 2, {1, 1}, "conv2"), - FC(18, 5, false, "out")}); + FC(18, 5, 1.0f, 1.0f, false, "out")}); g->getNode("inputConv")->getOperator()->setInput(0, inputTensor); g->getNode("inputConv")->getOperator()->setInput(1, weight1); @@ -321,7 +321,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { Conv(1, 3, {3, 3}, "conv1"), Conv(3, 4, {1, 1}, "conv2"), Conv(4, 3, {1, 1}, "conv3"), - FC(27, 5, false, "fc")}); + FC(27, 5, 1.0f, 1.0f, false, "fc")}); // g->getNode("conv1")->getOperator()->setInput(0, inputTensor); g->getNode("conv1")->getOperator()->setInput(1, weight1);