From 393fb207a6599cdfbbbe141e3cb29a3a5cae8246 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 26 Feb 2025 14:48:17 +0000
Subject: [PATCH 1/2] [upd] ConstantOfShape kernel to use Tensor as inputs and
 avoid redundant size computation

---
 .../cpu/operator/ConstantOfShapeImpl.hpp        |  8 +++-----
 .../operator/ConstantOfShapeImpl_kernels.hpp    | 17 ++++-------------
 src/operator/ConstantOfShapeImpl.cpp            |  9 +++------
 3 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
index 83e7e030..b595ec93 100644
--- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
@@ -12,23 +12,21 @@
 #ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
 #define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
 
-#include <cstddef>
 #include <memory>
-#include <vector>
 
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/ConstantOfShape.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
 
 namespace Aidge {
+
+class Tensor;
 // Operator implementation entry point for the backend
 using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op,
-    void(const std::vector<DimSize_t>, const Tensor&, void *)>;
+    void(const std::shared_ptr<Tensor>&, const Tensor&)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create);
 } // namespace Aidge
 
 #endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
-
diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
index 18ab9c0a..c42cc76a 100644
--- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
@@ -30,20 +30,11 @@
 namespace Aidge {
 template <class O>
 void ConstantOfShapeimpl_cpu_forward_kernel(
-    const std::vector<DimSize_t> output_dims, const Tensor &value,
-    void *output_) {
+    const std::shared_ptr<Tensor>& output_, const Tensor &value) {
 
-  O *output = static_cast<O *>(output_);
-  O val;
-  std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
-            static_cast<O *>(value.getImpl()->hostPtr()) +
-                static_cast<NbElts_t>(1),
-            &val);
-  const size_t output_size = std::accumulate(
-      output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
-  for (size_t i = 0; i < output_size; ++i) {
-    output[i] = val;
-  }
+  O* output = static_cast<O*>(output_->getImpl()->hostPtr());
+  const O val = *reinterpret_cast<O*>(value.getImpl()->hostPtr());
+  std::fill_n(output, output_->size(), val);
 }
 
 // Kernels registration to implementation entry point
diff --git a/src/operator/ConstantOfShapeImpl.cpp b/src/operator/ConstantOfShapeImpl.cpp
index 16e4b762..1d41160b 100644
--- a/src/operator/ConstantOfShapeImpl.cpp
+++ b/src/operator/ConstantOfShapeImpl.cpp
@@ -13,15 +13,14 @@
 
 #include <functional>
 #include <memory>
-#include <vector>
+#include <stdexcept>   // std::runtime_error
 
 #include "aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp"
-#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/ConstantOfShape.hpp"
+#include "aidge/backend/OperatorImpl.hpp"  // Aidge::getBestMatch, Aidge::getRequiredSpec
 #include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
 
 template <>
 void Aidge::ConstantOfShapeImpl_cpu::forward() {
@@ -33,9 +32,7 @@ void Aidge::ConstantOfShapeImpl_cpu::forward() {
     const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.forward(op_.getOutput(0)->dims(),
-             op_.value(), 
-             op_.getOutput(0)->getImpl()->rawPtr());
+    impl.forward(op_.getOutput(0), op_.value());
 }
 
 template <>
-- 
GitLab


From 9d9647aa0f91f637c5cd063b78b8a68075c2294e Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 26 Feb 2025 14:51:38 +0000
Subject: [PATCH 2/2] [upd] tests following 'aidge_core' changes

---
 .../operator/Test_ConstantOfShapeImpl.cpp     | 139 +++++++++---------
 .../recipies/Test_FoldConstantOfShape.cpp     |  50 +++++++
 2 files changed, 119 insertions(+), 70 deletions(-)
 create mode 100644 unit_tests/recipies/Test_FoldConstantOfShape.cpp

diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
index 8ec1669b..6833d836 100644
--- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
+++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
@@ -27,89 +27,88 @@
 #include "aidge/data/Tensor.hpp"
 #include "aidge/filler/Filler.hpp"
 #include "aidge/operator/ConstantOfShape.hpp"
-#include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
-TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
-  constexpr std::uint16_t NBTRIALS = 10;
-  // Create a random number generator
-  auto random_seed = Catch::Generators::Detail::getSeed;
-  std::mt19937 gen(random_seed());
-  std::uniform_real_distribution<float> valueDist(
-      0.1f, 1.1f); // Random float distribution between 0 and 1
-  std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
-      std::size_t(1), std::size_t(10));
-  std::uniform_int_distribution<int64_t> input_tensor_values_dist(
-      std::size_t(1), std::size_t(7));
-  std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.);
 
-  ///////////////////////////////////////////////
-  // SETUP FUNCTIONS
-  auto generate_input_tensor =
-      [&gen, &input_tensor_size_dist,
-       &input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
-    std::vector<DimSize_t> input_dims;
-    input_dims.push_back(input_tensor_size_dist(gen));
+TEST_CASE("[cpu/operator] ConstantOfShape(forward)", "[ConstantOfShape][CPU][forward]") {
+    constexpr std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    auto random_seed = Catch::Generators::Detail::getSeed;
+    std::mt19937 gen(random_seed());
+    std::uniform_real_distribution<float> valueDist(
+            0.1f, 1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
+            std::size_t(1), std::size_t(10));
+    std::uniform_int_distribution<int64_t> input_tensor_values_dist(
+            std::size_t(1), std::size_t(7));
+    std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.);
 
-    auto result = std::make_shared<Tensor>(input_dims);
-    result->setDataType(DataType::Int64);
-    result->setBackend("cpu");
-    for (DimSize_t i = 0; i < result->size(); ++i) {
-      result->set<std::int64_t>(i, input_tensor_values_dist(gen));
-    }
-    return result;
-  };
+    ///////////////////////////////////////////////
+    // SETUP FUNCTIONS
+    auto generate_input_tensor =
+            [&gen, &input_tensor_size_dist,
+             &input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
+        std::vector<DimSize_t> input_dims;
+        input_dims.push_back(input_tensor_size_dist(gen));
 
-  auto generate_random_operator =
-      [&gen,
-       &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
-    auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen)));
-    auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator());
-    op->setDataType(DataType::Float64);
-    op->setBackend("cpu");
-    return op;
-  };
+        auto result = std::make_shared<Tensor>(input_dims);
+        result->setDataType(DataType::Int64);
+        result->setBackend("cpu");
+        for (DimSize_t i = 0; i < result->size(); ++i) {
+            result->set<std::int64_t>(i, input_tensor_values_dist(gen));
+        }
+        return result;
+    };
 
-  auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
-                                   std::shared_ptr<ConstantOfShape_Op> op) {
-    std::vector<DimSize_t> output_dims;
-    output_dims.reserve(input_tensor->size());
-    for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
-      output_dims.push_back(input_tensor->get<int64_t>(i));
-    }
-    auto result = std::make_shared<Tensor>(output_dims);
-    result->setDataType(op->value().dataType());
-    result->setBackend("cpu");
-    constantFiller(result, op->value().get<double>(0));
-    return result;
-  };
+    auto generate_random_operator =
+            [&gen,
+             &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
+        std::shared_ptr<ConstantOfShape_Op> op = std::make_shared<ConstantOfShape_Op>(Tensor(operator_attr_value_dist(gen)));
+        op->setDataType(DataType::Float64);
+        op->setBackend("cpu");
+        return op;
+    };
+
+    auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
+                                      std::shared_ptr<ConstantOfShape_Op> op) {
+        std::vector<DimSize_t> output_dims;
+        output_dims.reserve(input_tensor->size());
+        for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
+            output_dims.push_back(input_tensor->get<std::int64_t>(i));
+        }
+        auto result = std::make_shared<Tensor>(output_dims);
+        result->setDataType(op->value().dataType());
+        result->setBackend("cpu");
+        constantFiller(result, op->value().get<double>(0));
+        return result;
+    };
 
-  /////////////////////////////////////
-  // BENCHMARKING
-  std::chrono::time_point<std::chrono::system_clock> start;
-  std::chrono::time_point<std::chrono::system_clock> end;
-  std::chrono::duration<double, std::micro> duration{};
-  int number_of_operation{0};
+    /////////////////////////////////////
+    // BENCHMARKING
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration{};
+    int number_of_operation{0};
 
-  SECTION("ConstantOfShapeImpl_cpu::forward()") {
-    for (int i = 0; i < NBTRIALS; ++i) {
-      auto input_T = generate_input_tensor();
-      std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator();
-      auto output_T = generate_output_tensor(input_T, op);
-      op->associateInput(0, input_T);
+    SECTION("ConstantOfShapeImpl_cpu::forward()") {
+        for (int i = 0; i < NBTRIALS; ++i) {
+            auto input_T = generate_input_tensor();
+            std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator();
+            auto output_T = generate_output_tensor(input_T, op);
+            op->associateInput(0, input_T);
 
-      REQUIRE(op->forwardDims(true));
-      REQUIRE_NOTHROW(op->forward());
+            REQUIRE(op->forwardDims(true));
+            REQUIRE_NOTHROW(op->forward());
 
-      CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
-      for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
-        CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i));
-      }
-      CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
+            CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
+            for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
+                CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i));
+            }
+            CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
+        }
     }
-  }
 }
 } // namespace Aidge
 
diff --git a/unit_tests/recipies/Test_FoldConstantOfShape.cpp b/unit_tests/recipies/Test_FoldConstantOfShape.cpp
new file mode 100644
index 00000000..a1c09b15
--- /dev/null
+++ b/unit_tests/recipies/Test_FoldConstantOfShape.cpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+ #include "aidge/graph/GraphView.hpp"
+ #include "aidge/operator/Identity.hpp"
+ #include "aidge/recipes/Recipes.hpp"
+
+ #include <cstdint>  // std::int64_t
+ #include <memory>
+
+ #include <catch2/catch_test_macros.hpp>
+
+ #include "aidge/graph/OpArgs.hpp"
+ #include "aidge/operator/ConstantOfShape.hpp"
+ #include "aidge/operator/Conv.hpp"
+ #include "aidge/operator/Producer.hpp"
+ #include "aidge/operator/ReLU.hpp"
+ #include "aidge/recipes/Recipes.hpp"
+ #include "aidge/utils/ArrayHelpers.hpp"
+ #include "aidge/utils/Types.h"
+
+ namespace Aidge {
+
+ TEST_CASE("[cpu/recipes] foldConstantOfShape",
+           "[ConstantOfShape][foldConstantOfShape][recipes]") {
+   auto input_T = std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 1, 3, 3}));
+
+   auto model = std::make_shared<GraphView>();
+   SECTION("Sequential model") {
+     model = Sequential({
+         Producer(input_T, "prod_0", true),
+         ConstantOfShape(3, "constantOfShape_0"),
+         Conv(1, 1, {3, 3}, "Conv_0"),
+         ReLU("ReLU_1")
+     });
+     // aidge_backend_cpu loaded. Recipe should work
+     REQUIRE(foldConstantOfShape(model) == 1);
+     CHECK(model->forwardDims());
+   }
+ }
+
+ }  // namespace Aidge
-- 
GitLab