diff --git a/.gitignore b/.gitignore
index 18f1583283b848051e2915895eae6a62751a9d0b..ba5c59398b68083c6c1c5fe820fb9070d999c18e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,17 +4,16 @@
 # C++ Build
 build*/
 install*/
+include/aidge/backend/quantization_version.h
 
 # VSCode
 .vscode
 
 # Python
-aidge_quantization/_version.py
 *.so
 __pycache__
 *.pyc
 *.egg-info
-aidge_quantization/_version.py
 wheelhouse/*
 
 # Mermaid
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 905a2a25c639b2186ddd6ab4b8737ff00c7ed4aa..b3c6d459dfaf29f5accbc0be4565a3709e9ffd3b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,33 +1,65 @@
 # CMake >= 3.18 is required for good support of FindCUDAToolkit
-cmake_minimum_required(VERSION 3.18) # XXX 3.18
-set(CXX_STANDARD 14)
+cmake_minimum_required(VERSION 3.18)
 
-file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
-file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project)
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS  OFF)
 
+# Read project metadata
+file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project)
 message(STATUS "Project name: ${project}")
+
+file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
+# Parse version.txt to retrieve Major, Minor and Path
+string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version})
+set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
+set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
+set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
 message(STATUS "Project version: ${version}")
 
+
+# Retrieve latest git commit
+execute_process(
+    COMMAND git rev-parse --short HEAD
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    ERROR_QUIET
+)
+message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
+
+
 project(${project}
         VERSION ${version}
         DESCRIPTION "Quantization methods for the Aidge framework."
         LANGUAGES CXX)
 
-# Note : project name is {project} and python module name is also {project}
-set(module_name _${project}) # target name
-set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
+if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
+    set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL})
+    list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL})
+    message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH"
+                    "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}"
+                    "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}")
+endif()
 
-set(CXX_STANDARD 14)
+message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h")
+
+
+# Note: Using configure_file later in the code make so that version variables are lost...
+# I tried to set in internal cache but it failed.
+# Current code is working, but there might be a scope issue.
+# Generate version.h file from config file version.h.in
+configure_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h"
+)
 
-##############################################
-# Import utils CMakeLists
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 
 ##############################################
 # Define options
-option(PYBIND "python binding" ON)
+option(PYBIND "python binding" OFF)
 option(WERROR "Warning as error" OFF)
-option(TEST "Enable tests" ON)
+option(TEST "Enable tests" OFF)
 option(COVERAGE "Enable coverage" OFF)
 option(CUDA "Enable CUDA backend" OFF) # XXX OFF
 option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
@@ -35,85 +67,55 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor
 ##############################################
 # Import utils CMakeLists
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
-include(PybindModuleCreation)
 
 if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
     Include(CodeCoverage)
 endif()
 
+# Set variables
 if(CUDA)
     enable_language(CUDA)
-
     message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}")
     # Define a preprocessor macro with the Cuda compiler version
     add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}")
 endif()
 
-if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
-    set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL})
-    list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL})
-    message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH"
-                    "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}"
-                    "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}")
+# Source files
+if(CUDA)
+    file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu")
+else()
+    file(GLOB_RECURSE src_files "src/*.cpp")
 endif()
 
-# ##############################################
-# Find system dependencies
-Include(FetchContent)
-
-FetchContent_Declare(
-    fmt
-    GIT_REPOSITORY https://github.com/fmtlib/fmt.git
-    GIT_TAG        10.2.1 # or a later release
-)
-
-set(FMT_SYSTEM_HEADERS ON)
-FetchContent_MakeAvailable(fmt)
-set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON)
+# Header files
+file(GLOB_RECURSE inc_files "include/*.hpp")
 
-if(CUDA)
-    find_package(CUDAToolkit REQUIRED)
-endif()
+# Note: cxx project name is {CMAKE_PROJECT_NAME} and python module name is also {CMAKE_PROJECT_NAME}
+set(module_name _${CMAKE_PROJECT_NAME}) # target name
+add_library(${module_name} ${src_files} ${inc_files})
+set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
 
 
-##############################################
-# Find system dependencies
+# Dependencies and linking
 find_package(aidge_core REQUIRED)
 find_package(aidge_backend_cpu REQUIRED)
+target_link_libraries(${module_name}
+    PUBLIC
+        _aidge_core
+        _aidge_backend_cpu
+)
 if(CUDA)
+    find_package(CUDAToolkit REQUIRED)
     find_package(aidge_backend_cuda REQUIRED)
-endif()
-
-##############################################
-# Create target and set properties
-
-if(CUDA)
-    file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu")
-    file(GLOB_RECURSE inc_files "include/*.hpp")
-
-    add_library(${module_name} ${src_files} ${inc_files})
     target_link_libraries(${module_name}
         PUBLIC
-            _aidge_core # _ is added because we link the target not the project
-            _aidge_backend_cpu
-            # _aidge_backend_cuda # XXX
             CUDA::cudart
             CUDA::cublas
             cudnn
     )
-else()
-    file(GLOB_RECURSE src_files "src/*.cpp")
-    file(GLOB_RECURSE inc_files "include/*.hpp")
-
-    add_library(${module_name} ${src_files} ${inc_files})
-    target_link_libraries(${module_name}
-        PUBLIC
-            _aidge_core # _ is added because we link the target not the project
-            _aidge_backend_cpu
-    )
 endif()
 
-#Set target properties
+# Include directories
 target_include_directories(${module_name}
     PUBLIC
         $<INSTALL_INTERFACE:include>
@@ -122,6 +124,7 @@ target_include_directories(${module_name}
         ${CMAKE_CURRENT_SOURCE_DIR}/src
 )
 
+# Compilation settings
 if(CUDA)
     if(NOT DEFINED CMAKE_CUDA_STANDARD)
         set(CMAKE_CUDA_STANDARD 14)
@@ -142,23 +145,44 @@ if (PYBIND)
     generate_python_binding(${pybind_module_name} ${module_name})
 endif()
 
-# XXX HERE !!!
-target_link_libraries(${module_name} PUBLIC fmt::fmt)
 target_compile_features(${module_name} PRIVATE cxx_std_14)
+target_link_libraries(${module_name} PRIVATE fmt::fmt)
 
+####################################
+# Compilation options and warnings
 target_compile_options(${module_name} PRIVATE
+    # Options for Clang, AppleClang, and GCC compilers
     $<$<COMPILE_LANGUAGE:CPP>:$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>>)
+        -Wall               # Enable all warnings
+        -Wextra             # Enable extra warnings
+        -Wold-style-cast    # Warn about C-style casts
+        -Winline            # Warn if inline expansion fails
+        -pedantic           # Enforce strict ISO C++ standards
+        -Werror=narrowing   # Treat narrowing conversions as errors
+        -Wshadow            # Warn about variable shadowing
+        $<$<BOOL:${WERROR}>:-Werror>  # Optionally treat warnings as errors
+    >>
+)
+
+# Additional MSVC-specific warning level
+target_compile_options(${module_name} PRIVATE
+    $<$<CXX_COMPILER_ID:MSVC>:
+        /W4  # Warning level 4 (highest for MSVC)
+    >
+)
+
+# CUDA-specific compile options
 if(CUDA)
     target_compile_options(${module_name} PRIVATE
         $<$<COMPILE_LANGUAGE:CUDA>:
-        -Wall>)
+            -Wall  # Enable all warnings for CUDA
+        >
+    )
 endif()
-target_compile_options(${module_name} PRIVATE
-    $<$<CXX_COMPILER_ID:MSVC>:
-    /W4>)
 
+# Coverage flags for GCC
 if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
+    include(CodeCoverage)
     append_coverage_compiler_flags()
 endif()
 
@@ -168,29 +192,31 @@ endif()
 include(GNUInstallDirs)
 set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project})
 
+# Install the library target
 install(TARGETS ${module_name} EXPORT ${project}-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 )
 
+# Install header files
 install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 
-#Export the targets to a script
-
+# Export targets for other projects to use
 install(EXPORT ${project}-targets
- FILE "${project}-targets.cmake"
- DESTINATION ${INSTALL_CONFIGDIR}
- COMPONENT ${module_name}
+    FILE "${project}-targets.cmake"
+    DESTINATION ${INSTALL_CONFIGDIR}
+    COMPONENT ${module_name}
 )
 
-if (PYBIND)
+# Python binding installation
+if(PYBIND)
     install(TARGETS ${pybind_module_name}
         DESTINATION ${PYBIND_INSTALL_PREFIX}
     )
 endif()
 
-#Create a ConfigVersion.cmake file
+# Create and install CMake configuration files
 include(CMakePackageConfigHelpers)
 write_basic_package_version_file(
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake"
@@ -203,15 +229,14 @@ configure_package_config_file("${project}-config.cmake.in"
     INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
 )
 
-#Install the config, configversion and custom find modules
+# Install CMake configuration files
 install(FILES
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake"
     DESTINATION ${INSTALL_CONFIGDIR}
 )
 
-##############################################
-## Exporting from the build tree
+# Export from build tree
 export(EXPORT ${project}-targets
     FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake")
 
@@ -219,10 +244,6 @@ export(EXPORT ${project}-targets
 ##############################################
 ## Add test
 if(TEST)
-    if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED)
-        message(WARNING "Skipping compilation of tests: missing Python embedded interpreter")
-    else()
-        enable_testing()
-        add_subdirectory(unit_tests)
-    endif()
+    enable_testing()
+    add_subdirectory(unit_tests)
 endif()
diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py
index dfdedd8394913c0b205bbb1084b4dfb3c95b24a3..56080bff0d1f4a95248fa983316dbafd35565501 100644
--- a/aidge_quantization/unit_tests/test_ptq.py
+++ b/aidge_quantization/unit_tests/test_ptq.py
@@ -21,7 +21,7 @@ ACCURACIES      = (95.4, 94.4)       # (97.9, 97.7)
 NB_BITS         = 4
 
 # --------------------------------------------------------------
-# UTILS 
+# UTILS
 # --------------------------------------------------------------
 
 def propagate(model, scheduler, sample):
@@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels):
 # --------------------------------------------------------------
 
 class test_ptq(unittest.TestCase):
-    
+
     def setUp(self):
 
         # load the samples / labels (numpy)
@@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase):
     def tearDown(self):
         pass
 
-    
+
     def test_model(self):
 
         Log.set_console_level(Level.Info)
         # compute the base accuracy
         accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels)
         self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1)
-    
+
     def test_quant_model(self):
 
-        Log.set_console_level(Level.Info)
+        Log.set_console_level(Level.Debug)
 
         # create the calibration dataset
+
         tensors = []
         for sample in self.samples[0:NB_SAMPLES]:
             sample = prepare_sample(sample)
@@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase):
 
         # quantize the model
 
-        
         aidge_quantization.quantize_network(
-            self.model, 
-            NB_BITS, 
-            tensors, 
-            clipping_mode=aidge_quantization.Clipping.MSE, 
+            self.model,
+            NB_BITS,
+            tensors,
+            clipping_mode=aidge_quantization.Clipping.MSE,
             no_quantization=False,
-            optimize_signs=True, 
+            optimize_signs=True,
             single_shift=False
         )
 
diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
index ddb820997837ec9b3603c6007497c8161145d587..1ed05e232ba9f8332c372a9524edd26fc7d9c45a 100644
--- a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
@@ -67,16 +67,16 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
         const GI fullPrecScale_4 = input[4*i+3] / stepSize[0];
         /*****************Features Gradient Computation********************/
         // STE method is simply applied
-        grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i] += grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                           (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                           GI(1.0));
-        grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+1] += grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
-        grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+2] += grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
-        grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+3] += grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
 
@@ -105,7 +105,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
     // Process remaining
     for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) {
         const GI fullPrecScale = input[i] / stepSize[0];
-        grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[i] += grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
                                         (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
                                         GI(1.0));
         GI qData = fullPrecScale;
@@ -117,7 +117,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
 
     const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second));
     // 3rd: Multiply Step Size gradient with scale factor
-    grad_stepSize[0] = diffStepSize * gradScaleFactor;
+    grad_stepSize[0] += diffStepSize * gradScaleFactor;
 }
 
 
diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp
index 96a52b4592bc05f34a47e04e664df27847a48e85..3d46dcfacc59e98ae193a9238a9474c6df015b7d 100644
--- a/include/aidge/operator/FixedQ.hpp
+++ b/include/aidge/operator/FixedQ.hpp
@@ -9,11 +9,12 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_
-#define AIDGE_CORE_OPERATOR_FIXEDQ_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_
+#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_
 
-#include <cassert>
+#include <cstddef>  // std::size_t
 #include <memory>
+#include <string>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
@@ -21,8 +22,8 @@
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/utils/StaticAttributes.hpp"
+#include "aidge/utils/Types.h"
 
 
 namespace Aidge {
@@ -43,24 +44,20 @@ private:
 
 public:
 
-    FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) :
-    OperatorTensor(Type, {InputCategory::Data}, 1),
-    mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned)))
+    FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) :
+      OperatorTensor(Type, {InputCategory::Data}, 1),
+      mAttributes(std::make_shared<Attributes_>(
+        attr<FixedQAttr::NbBits>(nbBits),
+        attr<FixedQAttr::Span>(span),
+        attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned)))
     {}
 
     /**
-     * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
+     * @brief Copy-constructor. Copy the operator attributes and its output
+     * tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    FixedQ_Op(const FixedQ_Op& op)
-        : OperatorTensor(op),  mAttributes(op.mAttributes)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(FixedQ_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    FixedQ_Op(const FixedQ_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
@@ -88,14 +85,16 @@ public:
 
 };
 
-inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name);
-}
-}
+std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8,
+                            float span = 4.0f,
+                            bool isOutputUnsigned = false,
+                            const std::string& name = "");
+
+}  // namespace Aidge
 
 namespace {
 template <>
 const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */
diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp
index 10ceb81b0346cd5c15e19460cd44923d6e062f76..970c476cb7be18b8d001edb27d60079de85b9349 100644
--- a/include/aidge/operator/LSQ.hpp
+++ b/include/aidge/operator/LSQ.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_LSQ_H_
-#define AIDGE_CORE_OPERATOR_LSQ_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_
+#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_
 
 #include <cassert>
 #include <memory>
@@ -95,7 +95,7 @@ public:
  */
 inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") {
     auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name);
-    addProducer(lsq, 1, {1}, "ss");
+    addProducer<1>(lsq, 1, {1}, "ss");
     return lsq;
 }
 }
@@ -105,4 +105,4 @@ template <>
 const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */
diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp
index 92ce1677b1b28e303c8488b55dd00cfafb519457..d168c38bf4f21a64f0007f2f65b0dfc4820d8297 100644
--- a/include/aidge/operator/SAT/DoReFa.hpp
+++ b/include/aidge/operator/SAT/DoReFa.hpp
@@ -9,17 +9,15 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_
-#define AIDGE_CORE_OPERATOR_DOREFA_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_
+#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_
 
-#include <cassert>
 #include <memory>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
-#include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/StaticAttributes.hpp"
 #include "aidge/utils/Types.h"
@@ -43,12 +41,17 @@ public:
     static const std::string Type;
 
 private:
-    using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>;
+    using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>;
     template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>;
     const std::shared_ptr<Attributes_> mAttributes;
 
 public:
-    DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default)
+    /**
+     * @brief Constructor for DoReFa_Op
+     * @param range The quantization range (default: 255)
+     * @param mode The quantization mode (default: Default)
+     */
+    DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default)
       : OperatorTensor(Type, {InputCategory::Param}, 1),
         mAttributes(std::make_shared<Attributes_>(
             attr<DoReFaAttr::Range>(range),
@@ -59,30 +62,34 @@ public:
      * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    DoReFa_Op(const DoReFa_Op& op)
-        : OperatorTensor(op),
-          mAttributes(op.mAttributes)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(DoReFa_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    DoReFa_Op(const DoReFa_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
      * @see Operator::DoReFa_Op
+     * @return std::shared_ptr<Operator> A deep copy of the operator
      */
-    std::shared_ptr<Operator> clone() const override {
-        return std::make_shared<DoReFa_Op>(*this);
-    }
+    std::shared_ptr<Operator> clone() const override;
 
+    /**
+     * @brief Get available backends for this operator
+     * @return std::set<std::string> Set of supported backend names
+     */
     std::set<std::string> getAvailableBackends() const override final;
+
+    /**
+     * @brief Set the backend for this operator
+     * @param name Backend name
+     * @param device Device index (default: 0)
+     */
     void setBackend(const std::string& name, DeviceIdx_t device = 0) override final;
 
+    /**
+     * @brief Get operator attributes
+     * @return std::shared_ptr<Attributes> Shared pointer to operator attributes
+     */
     inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; }
-    inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); }
+    inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); }
     inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); }
 
     static const std::vector<std::string> getInputsName(){
@@ -93,10 +100,20 @@ public:
     }
 };
 
-inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name);
-}
-}
+/**
+ * @brief Factory function to create a DoReFa operator node
+ *
+ * @param range Quantization range (default: 255)
+ * @param mode Quantization mode (default: Default)
+ * @param name Node name (default: empty)
+ *
+ * @return std::shared_ptr<Node> Shared pointer to the created node
+ */
+std::shared_ptr<Node> DoReFa(std::size_t range = 255,
+                             DoReFaMode mode = DoReFaMode::Default,
+                             const std::string& name = "");
+
+}  // namespace Aidge
 
 namespace {
 template <>
@@ -106,4 +123,4 @@ template <>
 const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */
diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp
index def43b872c021e539efe5658b592ceec9b3b5d4d..9d99d7024905332ff7336c62aaaa14d09c51e6d1 100644
--- a/include/aidge/operator/SAT/TanhClamp.hpp
+++ b/include/aidge/operator/SAT/TanhClamp.hpp
@@ -9,20 +9,18 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_
-#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_
+#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_
 
-#include <cassert>
 #include <memory>
+#include <set>
+#include <string>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
-#include "aidge/operator/Producer.hpp"
-#include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/StaticAttributes.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
@@ -44,23 +42,13 @@ public:
      * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    TanhClamp_Op(const TanhClamp_Op& op)
-        : OperatorTensor(op)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    TanhClamp_Op(const TanhClamp_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
      * @see Operator::TanhClamp_Op
      */
-    std::shared_ptr<Operator> clone() const override {
-        return std::make_shared<TanhClamp_Op>(*this);
-    }
+    std::shared_ptr<Operator> clone() const override;
 
     bool forwardDims(bool allowDataDependency = false) override final;
     std::set<std::string> getAvailableBackends() const override final;
@@ -75,9 +63,8 @@ public:
     }
 };
 
-inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name);
-}
-}
+std::shared_ptr<Node> TanhClamp(const std::string& name = "");
+
+}  // namespace Aidge
 
-#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */
diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp
index d94b6e930209450bc5d33331832b81a2623c56a7..f4dc073ee5ed02799a75505a2dc0a3a519e66548 100644
--- a/include/aidge/quantization/PTQ/CLE.hpp
+++ b/include/aidge/quantization/PTQ/CLE.hpp
@@ -9,29 +9,33 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_
-#define AIDGE_QUANTIZATION_PTQ_CLE_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <memory>
 
-#include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
 
 namespace Aidge
 {
 
     /**
-     * @brief Equalize the ranges of the nodes parameters by proceding iteratively. 
-     * Can only be applied to single branch networks (otherwise does not edit the graphView).
+     * @brief Equalize the ranges of the nodes parameters by proceding iteratively.
+     * Can only be applied to single branch networks (otherwise does not edit the GraphView).
+     *
+     * Cross Layer Equalization (CLE) is used to balance the weights between consecutive
+     * layers to improve quantization performance. It works by iteratively scaling weights
+     * and biases of adjacent layers while preserving the overall function of the network.
+     *
+     * @note The operation modifies weights and biases in-place but preserves the mathematical
+     * function computed by the network.
+     *
      * @param graphView The GraphView to process.
-     * @param targetDelta the stopping criterion (typical value : 0.01)
+     * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead
+     *                    to more precise equalization but may require more iterations.
      */
-    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01);
+    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01);
 
-}
+}  // namespace Aidge
 
-#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */
\ No newline at end of file
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */
diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp
index 08a0b0ade5fdec76dea5b222884fbbe6f206c138..3f65c42eb2032da10c4d337b53fb1bdd08a7aa55 100644
--- a/include/aidge/quantization/PTQ/Clipping.hpp
+++ b/include/aidge/quantization/PTQ/Clipping.hpp
@@ -9,14 +9,14 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_
-#define AIDGE_QUANTIZATION_PTQ_CLIP_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <cstdint>  // std::uint8_t
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
@@ -36,7 +36,7 @@ namespace Aidge
      * @param inputDataSet The input dataset, consisting of a vector of input samples.
      * @return A map associating each node name to it's corresponding activation histogram.
      */
-    std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda);
+    std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda);
 
     /**
      * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm.
@@ -45,7 +45,7 @@ namespace Aidge
      * @param exponent: The exponent of the Lp norm (e.g. 2 for the MSE).
      * @return The optimal clipping value.
      */
-    float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent);
+    double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent);
 
     /**
      * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the KL divergence.
@@ -53,12 +53,12 @@ namespace Aidge
      * @param nbBits: The quantization number of bits.
      * @return The optimal clipping value.
      */
-    float computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits);
+    double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits);
 
     /**
-     * @brief Return a corrected map of the provided activation ranges. 
-     * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. 
-     * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. 
+     * @brief Return a corrected map of the provided activation ranges.
+     * To do so compute the optimal clipping values for every node and multiply the input ranges by those values.
+     * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'.
      * @param clippingMode The method used to compute the optimal clippings.
      * @param valueRanges The map associating each affine node to its output range.
      * @param nbBits The quantization number of bits.
@@ -67,9 +67,9 @@ namespace Aidge
      * @param verbose Whether to print the clipping values or not.
      * @return The corrected map associating each provided node to its clipped range.
      */
-    std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose);
+    std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose);
 
 }
 
-#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */
 
diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index 52d83d60b9a6f31ca99204daf213b3abd28fae3c..4fc38bc3b959ec8264ddaddbd4673fbe1f75e4ab 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -9,16 +9,19 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_
-#define AIDGE_QUANTIZATION_PTQ_PTQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <cstdint>  // std::uint8_t
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>  // std::pair
+#include <vector>
 
 #include "aidge/data/Tensor.hpp"
+#include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/graph/GraphView.hpp"
 
 namespace Aidge {
@@ -31,7 +34,7 @@ namespace Aidge {
     /**
      * @brief Set of the types of the nodes which does not affect the PTQ process
      */
-    static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"});
+    static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather", "Resize"});
 
     /**
      * @brief Set of the types of the nodes that merge multiple branches into one
@@ -98,18 +101,18 @@ namespace Aidge {
      * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not.
      * @return A map associating each affine node name to it's corresponding output range.
      */
-    std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda);
+    std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda);
 
     /**
      * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range.
      * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors.
      * @param graphView The GraphView containing the affine nodes.
-     * @param valueRanges The node output value ranges computed over the calibration dataset. 
+     * @param valueRanges The node output value ranges computed over the calibration dataset.
      */
-    void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges);
+    void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges);
 
     /**
-     * @brief For each node, compute the sign of its input and output values. 
+     * @brief For each node, compute the sign of its input and output values.
      * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible.
      * @param graphView The GraphView to analyze.
      * @param verbose Whether to print the sign map or not.
@@ -135,7 +138,7 @@ namespace Aidge {
      * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
      * @param applyRounding Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
-     * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. 
+     * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights.
      * @param verbose Whether to print internal informations about the quantization process.
      */
     void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
@@ -145,7 +148,7 @@ namespace Aidge {
      * @param graphView The GraphView containing the affine nodes.
      * @return A map associating each affine node name to it's corresponding weight range.
      */
-    std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView);
+    std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView);
 
     /**
      * @brief Clear the affine nodes biases. Provided form debugging purposes.
@@ -157,8 +160,8 @@ namespace Aidge {
      * @brief Developement and test routine.
      * @param graphView The GraphView under test.
      */
-    void devPTQ(std::shared_ptr<GraphView> graphView); 
+    void devPTQ(std::shared_ptr<GraphView> graphView);
 }
 
-#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */
 
diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
index c4f2ac7262257ff5384302e3929700d8ed8e4fca..b9bad0d18f099e94d4c52254b08629c7f947db6a 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
@@ -8,22 +8,16 @@
  * SPDX-License-Identifier: EPL-2.0
  *
  ********************************************************************************/
-#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_
-#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_
 
-#include <array>
 #include <memory>
 #include <string>
-#include <utility>
-
-#include "aidge/operator/Clip.hpp"
-#include "aidge/operator/Mul.hpp"
-#include "aidge/operator/Round.hpp"
 
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/Node.hpp"
-#include "aidge/graph/OpArgs.hpp" // Sequential
-#include "aidge/operator/MetaOperator.hpp"
+
+namespace Aidge {
 
 /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
 /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.
@@ -33,7 +27,7 @@
 /// @param clip_max The maximum value for the clip operation.
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the meta-operator node.
-std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name);
+std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
 
 /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator.
 /// Therefore, this meta-operator consists solely of a [Mul] operation.
@@ -41,7 +35,7 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float
 /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with).
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the scaling node.
-std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name = "");
+std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = "");
 
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
@@ -50,25 +44,27 @@ std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated.
 /// @param newScalingFactor The new scaling factor to apply to the meta-operator node.
 /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible).
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor);
+void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor);
 
 /// @brief Retrieves the current scaling factor of a PTQ meta-operator node.
-/// This function returns the scaling factor associated with the specified PTQ meta-operator node, 
+/// This function returns the scaling factor associated with the specified PTQ meta-operator node,
 /// allowing inspection of the current scalar applied in the [Mul] operation.
 ///
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried.
 /// @return The scaling factor currently applied to the meta-operator node, or -1 if the operation fails (e.g., if MetaOpNode is null or incompatible).
-float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
+double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
 
 /// @brief Sets the clip range for an existing Quantizer node by specifying minimum and maximum clipping values.
 /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped
 /// in the [Clip] operation of the Quantizer sequence.
 ///
-/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. 
+/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set.
 /// This node should have been created using the Quantizer function.
 /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum.
 /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum.
 /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null).
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, float min, float max);
+void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max);
+
+}
 
-#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp
index ecbe7422ea85db1771d91e161c93740993ebbe2b..6a2aa249892d58fcbd5a45a8d7bb8de67effabaf 100644
--- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp
@@ -9,8 +9,10 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_
-#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_
+
+#include <memory>
 
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
@@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits
  * @brief Developement and test routine.
  * @param graphView The GraphView under test.
  */
-void devQAT(std::shared_ptr<GraphView> graphView); 
+void devQAT(std::shared_ptr<GraphView> graphView);
 
 }
 }
 
-#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */
 
diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 4970be07fae8737a1c2863600757bb81ff3a65f9..a44c71b04ca9e9c6a8fba27c615c99b4893d3d8c 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -9,12 +9,14 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_
-#define AIDGE_QUANTIZATION_QAT_LSQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_
+
+#include <cstddef>  // std::size_t
+#include <memory>
 
-#include "aidge/graph/Node.hpp"
-#include "aidge/graph/GraphView.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/graph/GraphView.hpp"
 
 namespace Aidge {
 namespace QuantLSQ {
@@ -25,7 +27,7 @@ namespace QuantLSQ {
  * @param nbBits Number of quantization bits.
  * @param span Fixed output span of the quantizers.
  */
-void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size);
+void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size);
 
 /**
  * @brief Given a GraphView with parameters properly initialized and some calibration data,
@@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float
  * @param calibrationData Calibration data used to adjust the spans.
  * @param scale Multiplicative constant applied to the spans.
  */
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData);
 
-}
-}
+}  // namespace QuantLSQ
+}  // namespace Aidge
 
-#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */
 
diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
new file mode 100644
index 0000000000000000000000000000000000000000..546263af3a7e8b7a73991173f48d0b095c7d9501
--- /dev/null
+++ b/include/aidge/quantization_version.h
@@ -0,0 +1,11 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+namespace Aidge {
+static constexpr const int PROJECT_VERSION_MAJOR = 0;
+static constexpr const int PROJECT_VERSION_MINOR = 2;
+static constexpr const int PROJECT_VERSION_PATCH = 0;
+static constexpr const char * PROJECT_VERSION = "0.2.0";
+static constexpr const char * PROJECT_GIT_HASH = "f50c860";
+}
+#endif // VERSION_H
diff --git a/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6b4deb81aa77924a813f0704221620e5f0f7fe04
--- /dev/null
+++ b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp
@@ -0,0 +1,38 @@
+#ifndef AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
+#define AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
+
+#include "aidge/utils/Log.hpp"
+#include "aidge/quantization_version.h"
+
+namespace Aidge {
+
+constexpr inline const char * getQuantizationProjectVersion(){
+    return PROJECT_VERSION;
+}
+
+constexpr inline const char * getQuantizationGitHash(){
+    return PROJECT_GIT_HASH;
+}
+
+void showQuantizationVersion() {
+    Log::info("Aidge quantization: {} ({}), {} {}", getQuantizationProjectVersion(), getQuantizationGitHash(), __DATE__, __TIME__);
+        // Compiler version
+    #if defined(__clang__)
+    /* Clang/LLVM. ---------------------------------------------- */
+        Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__);
+    #elif defined(__ICC) || defined(__INTEL_COMPILER)
+    /* Intel ICC/ICPC. ------------------------------------------ */
+        Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
+    #elif defined(__GNUC__) || defined(__GNUG__)
+    /* GNU GCC/G++. --------------------------------------------- */
+        Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+    #elif defined(_MSC_VER)
+    /* Microsoft Visual Studio. --------------------------------- */
+        Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
+    #else
+        Log::info("Unknown compiler\n");
+    #endif
+
+}
+}  // namespace Aidge
+#endif  // AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
diff --git a/include/aidge/version.h.in b/include/aidge/version.h.in
new file mode 100644
index 0000000000000000000000000000000000000000..4b876f63002972c1f8f1340b70cdecdace911012
--- /dev/null
+++ b/include/aidge/version.h.in
@@ -0,0 +1,11 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+namespace Aidge {
+static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@;
+static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@;
+static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@;
+static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@";
+static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@";
+}
+#endif // VERSION_H
diff --git a/pyproject.toml b/pyproject.toml
index fc745eb1b584a097e17dddbdd6feeb1565ae3b96..088200e44f589e221982ddaab825986c4224243d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,25 +4,47 @@ description="Quantization algorithms to compress aidge networks."
 dependencies = [
     "numpy>=1.21.6",
 ]
-requires-python = ">= 3.7"
+requires-python = ">= 3.8"
 readme = "README.md"
 license = { file = "LICENSE" }
 classifiers = [
     "Development Status :: 2 - Pre-Alpha",
-    "Programming Language :: Python :: 3"
-    ]
-dynamic = ["version"] #Â defined in tool.setuptools_scm
-# version="1"
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)",
+    "Programming Language :: C++",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3 :: Only",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development"
+]
+dynamic = ["version"] #Â defined by pbr
 
 [build-system]
 requires = [
     "setuptools>=64",
-    "setuptools_scm[toml]==7.1.0",
     "cmake>=3.15.3.post1",
-    "toml"
+    "toml",
+    "pbr"
 ]
 build-backend = "setuptools.build_meta"
 
+[project.urls]
+Homepage = "https://www.deepgreen.ai/en/platform"
+Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/"
+Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization"
+Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/issues/"
+Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/releases"
+
 #####################################################
 # SETUPTOOLS
 [tool.setuptools]
@@ -35,10 +57,6 @@ exclude = [
     "aidge_quantization.unit_tests.assets"
 ]  # exclude packages matching these glob patterns (empty by default)
 
-# SETUPTOOLS_SCM
-[tool.setuptools_scm]
-write_to = "aidge_quantization/_version.py"
-
 #####################################################
 # CIBUILDWHEEL
 [tool.cibuildwheel]
@@ -54,6 +72,19 @@ test-command = "pytest {package}/aidge_quantization/unit_tests"
 # "cp39-win_amd64",
 # "cp310-win_amd64",
 # ]
+# PYLINT
+[tool.pylint.main]
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"]
+# Files or directories to be skipped. They should be base names, not paths.
+ignore = ["CVS"]
+# List of module names for which member attributes should not be checked (useful
+# for modules/projects where namespaces are manipulated during runtime and thus
+# existing member attributes cannot be deduced by static analysis). It supports
+# qualified module names, as well as Unix pattern matching.
+ignored-modules = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"]
 ## AIDGE DEPENDENCIES DECLARATION
 [tool.cibuildwheel.environment]
 AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n"
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index ed2632566f4535e89d1e78314256f4c9b2b84623..b5193bddcfe345a1702f02fcc139a4cf5b94a1ce 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -220,7 +220,7 @@ void init_PTQ(py::module &m) {
     :type network: :py:class:`aidge_core.GraphView`
     )mydelimiter");
 
-    m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ");
+    m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ");
 
 }
 
diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp
index cd18cf8ebdd165f85284e397fe75e2a3eaf988bc..7ac344dcfcd4fc93e3bba1dcd19c1413f5a29d0c 100644
--- a/python_binding/pybind_Quantization.cpp
+++ b/python_binding/pybind_Quantization.cpp
@@ -20,7 +20,7 @@
 
 namespace py = pybind11;
 
-namespace Aidge 
+namespace Aidge
 {
 
 // operators
@@ -35,8 +35,9 @@ void init_QAT_FixedQ(py::module &m);
 void init_QAT_LSQ(py::module &m);
 void init_QuantRecipes(py::module &m);
 
+void init_QuantizationVersionInfo(py::module &m);
 
-PYBIND11_MODULE(aidge_quantization, m) 
+PYBIND11_MODULE(aidge_quantization, m)
 {
     init_FixedQ(m);
     init_LSQ(m);
@@ -47,6 +48,7 @@ PYBIND11_MODULE(aidge_quantization, m)
     init_QAT_FixedQ(m);
     init_QAT_LSQ(m);
     init_QuantRecipes(m);
+    init_QuantizationVersionInfo(m);
 }
 
 } // namespace Aidge
diff --git a/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..abed12b38df75471ddb57c505146ab12bc833ed8
--- /dev/null
+++ b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp
@@ -0,0 +1,11 @@
+#include <pybind11/pybind11.h>
+#include "aidge/utils/sys_info/QuantizationVersionInfo.hpp"
+
+namespace py = pybind11;
+namespace Aidge {
+void init_QuantizationVersionInfo(py::module& m){
+    m.def("show_version", &showQuantizationVersion);
+    m.def("get_project_version", &getQuantizationProjectVersion);
+    m.def("get_git_hash", &getQuantizationGitHash);
+}
+}
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..aa0f227f6688468a5ab93384f7b1670086000035
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+# pbr file
+[metadata]
+version = file: version.txt
diff --git a/setup.py b/setup.py
index 8774d01a4abd69c76319c71b610e98061153d4c2..1bfc0ac515fd8cceeec4cba666addc1e7666fd25 100644
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@ class AidgePkgBuild(build_ext):
         # This lists the number of processors available on the machine
         # The compilation will use half of them
         max_jobs = str(ceil(multiprocessing.cpu_count() / 2))
+        max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs)
 
         cwd = pathlib.Path().absolute()
 
@@ -51,14 +52,20 @@ class AidgePkgBuild(build_ext):
         package_prefix = build_lib if not self.editable_mode else SETUP_DIR
         pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute()
 
-        os.chdir(str(build_temp))
-
-        compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release")
         install_path = (
             os.path.join(sys.prefix, "lib", "libAidge")
             if "AIDGE_INSTALL" not in os.environ
             else os.environ["AIDGE_INSTALL"]
         )
+
+        # Read environment variables for CMake options
+        c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc")
+        cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++")
+        build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release")
+        asan = os.environ.get("AIDGE_ASAN", "OFF")
+        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF")
+        cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "")
+
         build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
         build_gen_opts = (
             ["-G", build_gen]
@@ -67,26 +74,36 @@ class AidgePkgBuild(build_ext):
         )
         test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF")
 
-        self.spawn(
-            [
-                "cmake",
-                *build_gen_opts,
-                str(cwd),
-                f"-DTEST={test_onoff}",
-                f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
-                f"-DCMAKE_BUILD_TYPE={compile_type}",
-                "-DPYBIND=ON",
-                f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
-                "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
-                "-DCOVERAGE=OFF",
-            ]
-        )
+        os.chdir(str(build_temp))
+
+        cmake_cmd = [
+            "cmake",
+            *build_gen_opts,
+            str(cwd),
+            f"-DTEST={test_onoff}",
+            f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
+            f"-DCMAKE_BUILD_TYPE={build_type}",
+            f"-DCMAKE_C_COMPILER={c_compiler}",
+            f"-DCMAKE_CXX_COMPILER={cxx_compiler}",
+            f"-DENABLE_ASAN={asan}",
+            f"-DCUDA={with_cuda}",
+            "-DPYBIND=ON",
+            f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
+            "-DCMAKE_EXPORT_COMPILE_COMMANDS=1",
+            "-DCOVERAGE=OFF",
+        ]
+
+        # Append architecture-specific arguments if provided
+        if cmake_arch:
+            cmake_cmd.append(cmake_arch)
+
+        self.spawn(cmake_cmd)
 
         if not self.dry_run:
             self.spawn(
-                ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs]
+                ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs]
             )
-            self.spawn(["cmake", "--install", ".", "--config", compile_type])
+            self.spawn(["cmake", "--install", ".", "--config", build_type])
         os.chdir(str(cwd))
 
 
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 1d5ccc757db7066da9b88ac338674a4ae81d16fc..5265d9c9b1326e73ee4080fe5f69fed5047a0dbb 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -10,14 +10,19 @@
  ********************************************************************************/
 
 #include "aidge/quantization/PTQ/CLE.hpp"
+
+#include <cmath>    // std::abs, std::fabs, std::sqrt
+#include <cstddef>  // std::size_t
+#include <memory>
+#include <vector>
+
 #include "aidge/quantization/PTQ/Clipping.hpp"
-#include "aidge/quantization/PTQ/PTQ.hpp"
+#include "aidge/quantization/PTQ/PTQ.hpp"  // retrieveNodeVector
 
 #include "aidge/graph/GraphView.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/utils/Log.hpp"
+#include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/Log.hpp"
 
 namespace Aidge
 {
@@ -32,23 +37,23 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
     return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] *= scaling;
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    float maxValue = 0.0f;
+    double maxValue = 0.0;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -57,20 +62,19 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
     return maxValue;
 }
 
-void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta)
+void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
 {
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     // Check if the CLE can be applied ...
-
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->getChildren().size() > 1)
         {
-            Log::info(" Network have multiple branches, skipping the CLE ... ");
+            Log::notice("Network have multiple branches, skipping the CLE ... ");
             return;
-        }    
+        }
 
-    Log::info(" Applying the Cross-Layer Equalization ... ");
+    Log::info("Applying the Cross-Layer Equalization ... ");
 
     // Get the vector of affine nodes
 
@@ -79,38 +83,46 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe
         if (isAffine(node))
             affineNodeVector.push_back(node);
 
-    float maxRangeDelta;
+    if (affineNodeVector.empty()) {
+        Log::notice("No affine nodes found in the network. CLE cannot be applied.");
+        return;
+    }
+    double maxRangeDelta;
+    int iteration = 0;
 
-    do 
+    do
     {
+        ++iteration;
         maxRangeDelta = 0.0;
-        
         //std::cout << " ----- " << std::endl;
         //for (std::shared_ptr<Node> node : affineNodeVector)
         //    std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl;
-        
-        for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
+
+        for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++)
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
 
-            float r1 = getTensorAbsoluteMax(getWeightTensor(n1));
-            float r2 = getTensorAbsoluteMax(getWeightTensor(n2));
+            double r1 = getTensorAbsoluteMax(getWeightTensor(n1));
+            double r2 = getTensorAbsoluteMax(getWeightTensor(n2));
 
-            float s1 = std::sqrt(r1 * r2) / r1;
-            float s2 = std::sqrt(r1 * r2) / r2;
+            double s1 = std::sqrt(r1 * r2) / r1;
+            double s2 = std::sqrt(r1 * r2) / r2;
 
             rescaleTensor(getWeightTensor(n1), s1);
             rescaleTensor(getWeightTensor(n2), s2);
 
             rescaleTensor(getBiasTensor(n1), s1);
 
-            float rangeDelta = std::abs(r1 - r2);
+            double rangeDelta = std::abs(r1 - r2);
             if (rangeDelta > maxRangeDelta)
                 maxRangeDelta = rangeDelta;
         }
     }
     while (maxRangeDelta > targetDelta);
+
+    Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}",
+                iteration, maxRangeDelta);
 }
 
 }
\ No newline at end of file
diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index e00140800e25b210aeabd27a877a6fdfa08708f9..57ad7a836bbb6251a8eeb6da87e3647b4f54afe2 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -19,7 +19,7 @@
 namespace Aidge
 {
     
-std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda)
+std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda)
 {
     if (useCuda)
         graphView->setBackend("cuda");
@@ -72,7 +72,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
             bool isInsideRanges = (valueRanges.find(node->name()) != valueRanges.end());
             if (isInsideRanges)
             {
-                float valueRange = valueRanges[node->name()];
+                double valueRange = valueRanges[node->name()];
 
                 std::shared_ptr<Operator> nodeOperator = node->getOperator();
                 std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
@@ -80,15 +80,17 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
                 if (useCuda)
                     valueTensor->setBackend("cpu");
 
-                float * castedTensor = static_cast<float *> (valueTensor->getImpl()->rawPtr());
+                double * castedTensor = static_cast<double *> (valueTensor->getImpl()->rawPtr());
 
                 std::vector<int> nodeHistogram = histograms[node->name()];
                 for(std::size_t i = 0; i < valueTensor->size(); i++)
                 {
-                    int bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins));
+                    std::size_t bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins));
+                    bin = std::min(bin, nodeHistogram.size() - 1);
                     nodeHistogram[bin]++;
                 }
-                histograms[node->name()] = nodeHistogram;
+
+                histograms[node->name()] = nodeHistogram;   
 
                 if (useCuda)
                     valueTensor->setBackend("cuda");
@@ -105,52 +107,53 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
     return histograms;
 }
 
-float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent)
+double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent)
 {
     int nbBins = histogram.size();
     int nbIter = 100;
     int signedMax = (1 << (nbBits - 1)) - 1;
 
-    std::vector<float> clippingErrors;
+    std::vector<double> clippingErrors;
     for (int it = 1; it < nbIter; it++)
     {
         // Compute the rounding cost of this particular clipping ...
-        float accumulatedError = 0.0;
-        float clipping = it / static_cast<float> (nbIter);
+        double accumulatedError = 0.0;
+        double clipping = it / static_cast<double> (nbIter);
         for (int bin = 0; bin < nbBins; bin++)
         {
-            float value = (bin + 0.5) / nbBins;
-            float scaling = signedMax / clipping;
-            float rounded = std::round(value * scaling) / scaling;
-            float clipped = std::min(clipping, rounded);
+            double value = (bin + 0.5) / nbBins;
+            double scaling = signedMax / clipping;
+            double rounded = std::round(value * scaling) / scaling;
+            double clipped = std::min(clipping, rounded);
 
-            float approxError = std::abs(clipped - value);
+            double approxError = std::abs(clipped - value);
             accumulatedError += std::pow(approxError, exponent) * histogram[bin];
         }
         clippingErrors.push_back(accumulatedError);
     }
 
-    std::vector<float>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end());
-    float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter);
-
+    std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); 
+    int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1;
+    double bestClipping = static_cast<double> (bestBin) / static_cast<double> (nbIter);
+    
     return bestClipping;
 }
 
-float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
+double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
 {
     // KL Clipping
 
     int nbIter = 100;
     int signedMax = (1 << (nbBits - 1)) - 1;
 
-    float refNorm = 0;
+    double refNorm = 0;
     for (int n : refHistogram)
-        refNorm += static_cast<float> (n);
+        refNorm += static_cast<double> (n);
 
-    std::vector<float> clippingErrors;
+    std::vector<double> clippingErrors;
     for (int it = 1; it < nbIter; it++)
     {
-        float clipping = it / static_cast<float> (nbIter);
+        double clipping = it / static_cast<double> (nbIter);
 
         // Create the histogram for this particular clipping ...
 
@@ -160,7 +163,7 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
             quantBin = std::min(quantBin, signedMax-1);
             quantHistogram[quantBin] += refHistogram[refBin];
@@ -168,10 +171,10 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         // Compute the mass of the histogram
 
-        float quantNorm = 0;
+        double quantNorm = 0;
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
             if (quantBin < static_cast<int> (quantHistogram.size()))
                 quantNorm += quantHistogram[quantBin];
@@ -179,15 +182,15 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         // Compute the KL divergence
         
-        float accumulatedError = 0.0;
+        double accumulatedError = 0.0;
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
 
-            float p = static_cast<float> (refHistogram[refBin]) / refNorm;
-            float q = (quantBin < static_cast<int> (quantHistogram.size())) ?
-                static_cast<float> (quantHistogram[quantBin]) / quantNorm : 0; 
+            double p = static_cast<double> (refHistogram[refBin]) / refNorm;
+            double q = (quantBin < static_cast<int> (quantHistogram.size())) ?
+                static_cast<double> (quantHistogram[quantBin]) / quantNorm : 0; 
 
             if (p != 0 && q != 0)
                 accumulatedError += q * std::log(q / p);
@@ -196,16 +199,17 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         clippingErrors.push_back(accumulatedError);
     }
 
-    std::vector<float>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end());
-    float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter);
+    std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end());
+    int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1;
+    double bestClipping = (static_cast<double> (bestBin)) / static_cast<double> (nbIter);
 
     return bestClipping;
 }
 
 
-std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose)
+std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose)
 {
-    float clipping = 1.0f;
+    double clipping = 1.0f;
 
     int nbBins = (1 << (nbBits + 4)) ; // XXX Enhance this !!!
 
@@ -213,6 +217,7 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s
     {
         if (verbose)
             Log::info(" === CLIPPING VALUES === ");
+
         std::map<std::string, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda);
 
         for (std::shared_ptr<Node> node : graphView->getNodes())
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 76fe8f24b3492a9488f808d54c2ce6b491a9b79a..0e26313475bbbda23a56dcdda52d55a0a5af8204 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -66,20 +66,20 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void fillTensor(std::shared_ptr<Tensor> tensor, float value)
+static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Fill the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = value;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
@@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    float maxValue = 0.0f;
+    double maxValue = 0.0f;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -187,6 +187,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
 
     bool containsBatchNorm = false;
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->type() == "BatchNorm")
         {
@@ -200,6 +201,12 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
     popSoftMax(graphView);
 }
 
+// TODO : enhance this by modifying OperatorImpl in "core" ...
+static DataType getDataType(std::shared_ptr<Node> node)
+{
+    auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator());
+    return op->getOutput(0)->dataType();
+}
 
 // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!!
 void insertResidualNodes(std::shared_ptr<GraphView> graphView)
@@ -217,6 +224,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
             {
                 std::shared_ptr<Node> parentNode = node->getParent(i);
                 bool parentIsForking = (parentNode->getChildren().size() > 1);
+
                 if (parentIsForking)
                 {
                     // temporary verbose ...
@@ -224,8 +232,9 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     Log::info(" ### inserting multiplicative node ...");
 
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
-                    std::shared_ptr<Node> residualNode = Scaling(1.0,residualNodeName);
-                    residualNode->getOperator()->setDataType(DataType::Float32);
+                    std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName);
+
+                    residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode)
                     residualNode->getOperator()->setBackend("cpu");
 
                     graphView->insertParent(node, residualNode, i, 0, 0);
@@ -255,7 +264,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
         {
             std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
             std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
-            scalingNode->getOperator()->setDataType(DataType::Float32);
+
+            scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
             scalingNode->getOperator()->setBackend("cpu");
 
             if (parentNode->getChildren().size() > 0)
@@ -283,7 +293,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
             else
             {
                 // Log::info(" last node reached ! ");
-                graphView->addChild(scalingNode);
+                parentNode->addChild(scalingNode, 0, 0);
+                graphView->add(scalingNode);
             }
         }
     }
@@ -322,7 +333,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
-    std::map<std::string, float> accumulatedRatios;
+    std::map<std::string, double> accumulatedRatios;
     for (std::shared_ptr<Node> node : nodeVector)
     {
         accumulatedRatios.insert(std::make_pair(node->name(), 1.0));
@@ -349,8 +360,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
         {
             // Rescale the weight tensor
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            float scaling = getTensorAbsoluteMax(weightTensor);
-            float ratio = 1.0 / scaling;
+            double scaling = getTensorAbsoluteMax(weightTensor);
+            double ratio = 1.0 / scaling;
             rescaleTensor(weightTensor, ratio);
 
             // Accumulate the ratio
@@ -378,10 +389,10 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents();
 
             // Compute the max ratio ...
-            float maxRatio = 0;
+            double maxRatio = 0;
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float merginNodeRatio = accumulatedRatios[mergingNode->name()];
+                double merginNodeRatio = accumulatedRatios[mergingNode->name()];
                 if (merginNodeRatio > maxRatio)
                     maxRatio = merginNodeRatio;
             }
@@ -391,13 +402,14 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             // Rescale the previous scaling Nodes
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float mergingNodeRatio = accumulatedRatios[mergingNode->name()];
-                float rescaling = mergingNodeRatio / maxRatio;
+                double mergingNodeRatio = accumulatedRatios[mergingNode->name()];
+                double rescaling = mergingNodeRatio / maxRatio;
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
 
-                float scaling_factor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode,scaling_factor / rescaling);
+                double currScalingFactor = getScalingFactor(scalingNode);
+                updateScalingFactor(scalingNode, currScalingFactor / rescaling);
+
                 accumulatedRatios[mergingNode->name()] /= rescaling; // optional ...
             }
         }
@@ -405,9 +417,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 }
 
 // XXX TODO : take care of the CUDA backend for this too !!!
-std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly)
+std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly)
 {
-    std::map<std::string, float> valueRanges;
+    std::map<std::string, double> valueRanges;
 
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
@@ -425,7 +437,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
         {
             std::shared_ptr<Operator> nodeOperator = node->getOperator();
             std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
-            float range = getTensorAbsoluteMax(valueTensor);
+            double range = getTensorAbsoluteMax(valueTensor);
 
             // Associate the value to the scaling node ...
             valueRanges.insert(std::make_pair(node->name(), range));
@@ -435,9 +447,9 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
     return valueRanges;
 }
 
-std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
+std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
 {
-    std::map<std::string, float> valueRanges;
+    std::map<std::string, double> valueRanges;
     std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     
     // std::shared_ptr<Node> inputNode = getFirstNode(graphView);
@@ -467,7 +479,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
 
         // Gather the sample ranges ...
 
-        std::map<std::string, float> sampleRanges;
+        std::map<std::string, double> sampleRanges;
         for (std::shared_ptr<Node> node : nodeSet)
         {
             if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
@@ -478,7 +490,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
                 if (useCuda)
                     valueTensor->setBackend("cpu");
 
-                float range = getTensorAbsoluteMax(valueTensor);
+                double range = getTensorAbsoluteMax(valueTensor);
 
                 // Associate the value to the scaling node ...
                 sampleRanges.insert(std::make_pair(node->name(), range));
@@ -510,7 +522,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
     return valueRanges;
 }
 
-void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges)
+void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges)
 {
     std::shared_ptr<Node> firstNode = getFirstNode(graphView);
 
@@ -518,7 +530,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
-    std::map<std::string, float> scalingFactors;
+    std::map<std::string, double> scalingFactors;
 
     for (std::shared_ptr<Node> node : nodeVector)
         scalingFactors.insert(std::make_pair(node->name(), 1.0));
@@ -549,13 +561,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
         {
             // retrieve the previous scaling factor ...
             std::shared_ptr<Node> prevNode = node->getParent(0);
-            float prevScalingFactor = scalingFactors[prevNode->name()];
+            double prevScalingFactor = scalingFactors[prevNode->name()];
 
             // ValueRanges must contains all the scaling nodes !!!
-            float scalingFactor = valueRanges[node->name()]; 
+            double scalingFactor = valueRanges[node->name()]; 
 
-            float scaling_factor = getScalingFactor(node);
-            updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor));
+            double currScalingFactor = getScalingFactor(node);
+            updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor));
 
             scalingFactors[node->name()] = scalingFactor;
 
@@ -579,10 +591,10 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents();
 
             // Compute the max scaling ...
-            float maxScaling = 0;
+            double maxScaling = 0;
             for (std::size_t i = 0; i < mergingNodes.size(); i++)
             {
-                float merginNodeScaling = scalingFactors[mergingNodes[i]->name()];
+                double merginNodeScaling = scalingFactors[mergingNodes[i]->name()];
                 if (merginNodeScaling > maxScaling) {
                     maxScaling = merginNodeScaling;
                 }
@@ -592,13 +604,14 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float mergingNodeScaling = scalingFactors[mergingNode->name()];
-                float rescaling = mergingNodeScaling / maxScaling;
+                double mergingNodeScaling = scalingFactors[mergingNode->name()];
+                double rescaling = mergingNodeScaling / maxScaling;
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
                 //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
-                float scaling_factor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode, scaling_factor * rescaling);                
+
+                double currScalingFactor = getScalingFactor(scalingNode);
+                updateScalingFactor(scalingNode, currScalingFactor * rescaling);                
             }
         }
     }
@@ -699,8 +712,12 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
         {
             // Thoses nodes always have a single parent 
             std::shared_ptr<Node> parent = node->getParent(0); 
-            signMap[node->name()].first = signMap[parent->name()].second;
-            signMap[node->name()].second = signMap[node->name()].first;
+            if (parent)
+            {
+                signMap[node->name()].first = signMap[parent->name()].second;
+                signMap[node->name()].second = signMap[node->name()].first;
+            }
+            
         }
     }
 
@@ -735,8 +752,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         AIDGE_THROW_OR_ABORT(std::runtime_error,"Signs optimization can not be applied if network is not fully quantized ...");
     }
 
-    float signedMax = (1 << (nbBits - 1)) - 1;
-    float unsignedMax = (1 << nbBits) - 1;
+    double signedMax = (1 << (nbBits - 1)) - 1;
+    double unsignedMax = (1 << nbBits) - 1;
 
     std::map<std::string, std::pair<bool, bool>> signMap;
 
@@ -771,7 +788,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             if (nodeHasBias(node))  
             {
                 bool inputIsUnsigned = signMap[node->name()].first;
-                float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
+                double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
                 
 
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
@@ -783,7 +800,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             // Compensate the rescaling using the next Scaling node
 
-            float rescaling = 1.0 / signedMax;
+            double rescaling = 1.0 / signedMax;
 
             bool inputIsUnsigned  = signMap[node->name()].first; 
             bool outputIsUnsigned = signMap[node->name()].second;
@@ -792,13 +809,14 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
             
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
-            float scaling_factor = getScalingFactor(scalingNode);
-            updateScalingFactor(scalingNode, scaling_factor * rescaling);
+
+            double currScalingFactor = getScalingFactor(scalingNode);
+            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
         }
         
         if (isMerging(node))
         {
-            float rescaling = 1.0;
+            double rescaling = 1.0;
 
             bool inputIsUnsigned  = signMap[node->name()].first;
             bool outputIsUnsigned = signMap[node->name()].second;
@@ -808,9 +826,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
         
-
-            float scaling_factor = getScalingFactor(scalingNode);
-            updateScalingFactor(scalingNode,scaling_factor * rescaling);
+            double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming
+            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
         }
         
         // Handle the Scaling Nodes ...
@@ -819,18 +836,17 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         {
             if (!noQuant) 
             {  
-                //[!!] replacement of Scaling Node by Quantizer
-                float currentSF = getScalingFactor(node);
+                // Replace  the  Scaling Node by Quantizer
 
-                std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, - (signedMax + 1), signedMax, node->name());
-                quantizerNode->getOperator()->setDataType(DataType::Float32);
+                std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name());
+                quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
-                graphView->replace({node}, {quantizerNode});
 
+                graphView->replace({node}, {quantizerNode});
 
                 if (optimizeSigns)
                 {
-                    float rescaling = 1.0;
+                    double rescaling = 1.0;
 
                     bool inputIsUnsigned  = signMap[node->name()].first;
                     bool outputIsUnsigned = signMap[node->name()].second;
@@ -838,8 +854,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                     rescaling /= inputIsUnsigned  ? unsignedMax : signedMax;
                     rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
 
-                    float scalingFactor = getScalingFactor(quantizerNode);
-                    updateScalingFactor(quantizerNode,scalingFactor * rescaling);
+                    double currScalingFactor = getScalingFactor(quantizerNode);
+                    updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
 
                     if(outputIsUnsigned)
                     {
@@ -854,7 +870,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits)
 {
     // XXX Use the signMap to increase the resolution when possible ...
-    float signedMax = (1 << (nbBits - 1)) - 1;    
+    double signedMax = (1 << (nbBits - 1)) - 1;    
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -874,7 +890,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-                mulNode->getOperator()->setDataType(DataType::Float32);
+
+                mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
@@ -882,10 +899,11 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // create and insert the producer node
 
                 std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0));
-
                 std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>();
-                coeffTensor->setDataType(DataType::Float32);
-                coeffTensor->setBackend("cpu");            
+
+                coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode)
+                coeffTensor->setBackend("cpu"); 
+
                 coeffTensor->resize(inputTensor->dims());
                 fillTensor(coeffTensor, 1); 
 
@@ -896,8 +914,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // rescale the coeffs and edit scaling factor
 
                 fillTensor(coeffTensor, signedMax);
-                float sf  = getScalingFactor(node);
-                updateScalingFactor(node,sf/signedMax);
+
+                double currScalingFactor = getScalingFactor(node); // XXX bad naming !
+                updateScalingFactor(node, currScalingFactor / signedMax);
 
                 // TODO : double check this !!!
                 //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
@@ -906,26 +925,24 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     }
 }
 
-void 
-
-performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
+void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
 {
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        //Use A meatoperator of type Scaling of MulCompensation instead
+        // Use A meatoperator of type Scaling of MulCompensation instead
         if (isAffine(node) || (node->type() == "Mul"))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
 
-            float base = getScalingFactor(scalingNode);
+            double base = getScalingFactor(scalingNode);
 
-            float approx = std::pow(2, std::ceil(std::log2(base)));
+            double approx = std::pow(2, std::ceil(std::log2(base)));
 
             updateScalingFactor(scalingNode,approx);
 
-            float ratio = base / approx;
+            double ratio = base / approx;
 
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
             rescaleTensor(weightTensor, ratio);
@@ -947,19 +964,49 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
     Log::info(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
-        if (node->type() == "Scaling")
+        if (node->type() == "Scaling" || node->type() == "Quantizer")
         {
-            float factor = getScalingFactor(node);
-            Log::info(" {:.6f} ({})", factor, node->name());
+            double scalingFactor = getScalingFactor(node);
+            Log::info(" {:.6f} ({})", scalingFactor, node->name());
         }
 }
 
+static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
+{
+    graphView->setDataType(dataType);
+
+    for (auto inputNode : graphView->inputNodes()) {
+        auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator());
+        auto inputTensor = op->getInput(0);
+        if (inputTensor)
+            inputTensor->setDataType(dataType);
+    }
+
+    for (auto tensor : inputDataSet)
+        tensor->setDataType(dataType);
+}
+
+static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges)
+{
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.generateScheduling();
+
+    auto scheduling = scheduler.getStaticScheduling();
+    for (auto node : scheduling)
+        if (node->type() == "Scaling")
+            fmt::println("{} range = {}", node->name(), valueRanges[node->name()]);
+}
+
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
     graphView->setBackend("cpu");
 
+    DataType initialDataType = (inputDataSet[0])->dataType();
+    setupDataType(graphView, inputDataSet, DataType::Float64);
+
     if (!checkArchitecture(graphView))
         return;
 
@@ -975,11 +1022,17 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     normalizeParameters(graphView);
 
     Log::info(" Computing the value ranges ...");
-    std::map<std::string, float> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
+    std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
+
+    //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl;
+    //printRanges(graphView, valueRanges);
 
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
+    //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl;
+    //printRanges(graphView, valueRanges);
+
     Log::info(" Normalizing the activations ...");
     normalizeActivations(graphView, valueRanges);
 
@@ -992,32 +1045,39 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         insertCompensationNodes(graphView, nbBits);
 
         Log::info(" Performing the Single-Shift approximation ...");
-        performSingleShiftApproximation(graphView,noQuant);
+        performSingleShiftApproximation(graphView, noQuant);
     }
-    
+
     if (verbose)
         printScalingFactors(graphView);
 
-    Log::info(" Resetting the scheduler ...");
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
+    //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
 
+    setupDataType(graphView, inputDataSet, initialDataType);
     if (useCuda)
         graphView->setBackend("cuda");
 
+    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
+
+    Log::info(" Reseting the scheduler ...");
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+
     Log::info(" Network is quantized !");
 }
 
-std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView)
+std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView)
 {
-    std::map<std::string, float> weightRanges;
+    std::map<std::string, double> weightRanges;
 
     for (std::shared_ptr<Node> node : graphView->getNodes())
     {
         if (isAffine(node))
         {
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            float range = getTensorAbsoluteMax(weightTensor);
+            double range = getTensorAbsoluteMax(weightTensor);
             weightRanges.insert(std::make_pair(node->name(), range));
         }
     }
@@ -1038,7 +1098,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
 void devPTQ(std::shared_ptr<GraphView> graphView) 
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-        std::cout << " UUU : " << node->name() << std::endl;   
+        fmt::println(" UUU : {}", node->name());
 }
 
 }
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 89590cbe6aeaf036a50902d5b63c1b46044d2c7f..77018c23aee2f1ef6f430389393fd35e97baa0f6 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -11,8 +11,8 @@
 
 #include "aidge/quantization/PTQ/PTQMetaOps.hpp"
 
-#include <array>
 #include <memory>
+#include <string>
 #include <utility>
 
 //Operator
@@ -28,100 +28,125 @@
 #include "aidge/utils/Types.h"
 #include "aidge/operator/Identity.hpp"
 #include "aidge/data/Tensor.hpp"
-std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name)
+#include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/Log.hpp"
+
+
+namespace Aidge
+{
+
+std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
 {
-    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor});
-    std::shared_ptr<Aidge::Node> mul_node =  Aidge::Mul((!name.empty()) ? name + "_MulQuant" : "");
-
-    std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); 
-    producer_scaling_factor ->getOperator()->setOutput(0,ScalingFactorTensorAttached);
-    
-    std::shared_ptr<Aidge::Node> clip_node = Aidge::Clip((!name.empty()) ? name + "_ClipQuant" : "",clip_min,clip_max);
-    
-    std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({
-        mul_node,
-        Aidge::Round((!name.empty()) ? name + "_RoundQuant" : ""),
-        clip_node});
-
-    std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node);
-    std::shared_ptr<Aidge::Node> metaopNode = MetaOperator("Quantizer",connectedGV,{},name);
-    return metaopNode; 
+    // create the nodes
+
+    std::shared_ptr<Node> mulNode =  Mul((!name.empty()) ? name + "_MulQuant" : "");
+    std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax);
+
+    // connect the scaling factor producer
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
+
+    // return the metaop
+
+    std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype
+
+    return metaopNode;
 }
 
-std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name)
+std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
 {
-    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor});
-    
-    std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_Scaling" : "");
-    
-    std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); 
-    producer_scaling_factor->getOperator()->setOutput(0, ScalingFactorTensorAttached);
-    std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({mul_node});
-    std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node);
-    Aidge::NodePtr metaopNode = MetaOperator("Scaling",connectedGV,{},name);
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+
+    std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : "");
+
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+
+    std::shared_ptr<GraphView> graphView  = Sequential({mulNode});
+    std::shared_ptr<GraphView> connectedGraphView  = getConnectedGraphView(mulNode);
+
+    NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name);
+
     return metaopNode;
 }
 
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor)
+static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
 {
-    if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type());
-    }
-    std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{newScalingFactor});
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-            if(node->type() == "Mul")
-            {
-                node->input(1).first->getOperator()->setOutput(0, newScalingFactorTensorAttached);
-                return true;
-            }
-    }
-    AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
-    return false;
+    std::shared_ptr<Node> mulNode = nullptr;
+    for(std::shared_ptr<Node> node : graphView->getNodes())
+        if (node->type() == nodeType)
+            mulNode = node;
+
+    return mulNode;
 }
-float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
+
+void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
 {
-    if(MetaOpNode->type() != "Scaling"  && MetaOpNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use getPTQMetaOpsScalingFactor on Node of type {}",MetaOpNode->type());
-        return -1;
+    if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
+        Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type());
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator());
+
+    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+
+    if (!mulNode)
+        Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! ");
+
+    mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor);
+}
+
+double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
+{
+    if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
+        Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
+        return 0;
     }
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-            if(node->type() == "Mul")
-            {
-                    std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0);
-                    void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr();
-                    return (*(static_cast<float*>(RawInputScalingFactor)));
-            }
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator());
+
+    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+
+    if (!mulNode) {
+        Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
+        return 0;
     }
-    AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
-    return -1;
+
+    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+
+    return localTensor.get<double>(0);
 }
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,float min, float max)
+
+
+void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
 {
-    if(QuantizerNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use setQuantizerClipRange on Node of type {}",QuantizerNode->type());
-        return false;
+    if (quantizerNode->type() != "Quantizer") {
+        Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type());
+        return;
     }
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(QuantizerNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Node inside  
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-            if(node->type() == "Clip")
-            {
-                std::shared_ptr<Aidge::Clip_Op> Clip_Node_Op = std::static_pointer_cast<Aidge::Clip_Op>(node->getOperator());
-                Clip_Node_Op->max() = max;
-                Clip_Node_Op->min() = min;
-                return true;
-            }
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
+
+    std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!clipNode) {
+        Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
+        return;
     }
-    AIDGE_ASSERT("Invalid MetaOperator Quantizer, no clip node found inside Node of type {}",QuantizerNode->type());
-    return false;
+
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator());
+    clipOp->max() = max;
+    clipOp->min() = min;
+}
 }
\ No newline at end of file
diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp
index d22074f15982f9fa0e92cfc4425af32e584db8cd..9160b4ae6add5ae0347e008962956dc90c3a36fd 100644
--- a/src/QAT/QAT_FixedQ.cpp
+++ b/src/QAT/QAT_FixedQ.cpp
@@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float inputStd = getTensorStd(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputStd));
-            std::cout << node->name() << " -> " << inputStd << std::endl;
+            fmt::println("{} -> {}", node->name(), inputStd);
         }
     }
 
@@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float paramStd = getTensorStd(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramStd));
-            std::cout << node->name() << " -> " << paramStd << std::endl;
+            fmt::println("{} -> {}", node->name(), paramStd);
         }
     }
     
@@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView)
     scheduler.generateScheduling();
     auto s = scheduler.getStaticScheduling();
     for (std::shared_ptr<Node> node : s)
-        std::cout << " name : " << node->name() << std::endl;
+        fmt::println(" name : {}", node->name());
 }
 
 }
\ No newline at end of file
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 38c818214361d007218b1300b73d60b3124e2b7d..9b51e846df498a9303b7373ae1c86d4b007a96f0 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -125,7 +125,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
             float inputAbsMean = getTensorAbsMean(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
+            fmt::println("{} -> {}", node->name(), inputAbsMean);
         }
     }
 
@@ -148,7 +148,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
             float paramAbsMean = getTensorAbsMean(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
+            fmt::println("{} -> {}", node->name(), paramAbsMean);
         }
     }
     
diff --git a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
index 0d5490946af3a4ab172bafc13d9af8c191695b84..96065e41376a1facee8a05260f33a1ce68ceb92a 100644
--- a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
+++ b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
@@ -84,10 +84,11 @@ __global__ void LSQImpl_cuda_backward_kernel_(const std::size_t inputLength,
 
         const GI fullPrecScale = input[i] / stepSize[0];
         /*****************************Data/Weights Gradient Computation************************/
-        // STE method is simply apply:
-        grad_input[i] = grad_output[i]*(  (fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
-                                            (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
-                                                    GI(1.0)); 
+        // STE method is simply applied :
+        // (we accumulate the gradient instead of replacing it)
+        grad_input[i] += grad_output[i] * ((fullPrecScale <= static_cast<GI>(range.first))  ? GI(0.0) :
+                                           (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
+                                            GI(1.0)); 
 
         /*****************************Step Size Gradient Computation*************************/
         GI qData = fullPrecScale;
@@ -142,7 +143,9 @@ void Aidge::LSQImpl_cuda_backward_kernel(const std::size_t inputLength,
     // for simplicity and foolproof-ness
     thrust::device_ptr<GI> grad_workspacePtr(grad_workspace);
     thrust::device_ptr<GI> grad_stepSizePtr(grad_stepSize);
-    grad_stepSizePtr[0] = thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0));
+
+    // We accumulate the stepSize gradient instead of replacing it 
+    grad_stepSizePtr[0] += thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0));
 
     //printf(" step grad = %f \n", (float) grad_stepSizePtr[0]);
 
diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp
index 879174032bfcf5b2958b0950d0ed7410ba83331c..9828ce98f4918b3d2336c57fe018c9129804cf01 100644
--- a/src/operator/FixedQ.cpp
+++ b/src/operator/FixedQ.cpp
@@ -20,6 +20,17 @@
 
 const std::string Aidge::FixedQ_Op::Type = "FixedQ";
 
+Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op)
+    : OperatorTensor(op),
+      mAttributes(op.mAttributes)
+{
+    if (op.mImpl){
+        SET_IMPL_MACRO(FixedQ_Op, *this, op.backend());
+    }else{
+        mImpl = nullptr;
+    }
+}
+
 std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const {
     return Registrar<FixedQ_Op>::getKeys();
 }
@@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) {
     SET_IMPL_MACRO(FixedQ_Op, *this, name);
     mOutputs[0]->setBackend(name, device);
 }
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits,
+                            float span,
+                            bool isOutputUnsigned,
+                            const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name);
+}
\ No newline at end of file
diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp
index b6124bad0e5f04c8e22e2d16c48dd4fe5de7945a..426e330e7f8426d256ca76a843548a91a62b036a 100644
--- a/src/operator/SAT/DoReFa.cpp
+++ b/src/operator/SAT/DoReFa.cpp
@@ -17,13 +17,38 @@
 #include "aidge/data/Tensor.hpp"
 #include "aidge/utils/Types.h"
 
-const std::string Aidge::DoReFa_Op::Type = "DoReFa";
+namespace Aidge {
 
-std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const {
+const std::string DoReFa_Op::Type = "DoReFa";
+
+DoReFa_Op::DoReFa_Op(const DoReFa_Op& op)
+    : OperatorTensor(op),
+      mAttributes(op.mAttributes)
+{
+    if (op.mImpl) {
+        SET_IMPL_MACRO(DoReFa_Op, *this, op.backend());
+    } else {
+        mImpl = nullptr;
+    }
+}
+
+std::shared_ptr<Operator> DoReFa_Op::clone() const {
+    return std::make_shared<DoReFa_Op>(*this);
+}
+
+std::set<std::string> DoReFa_Op::getAvailableBackends() const {
     return Registrar<DoReFa_Op>::getKeys();
 }
 
-void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) {
+void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) {
     SET_IMPL_MACRO(DoReFa_Op, *this, name);
     mOutputs[0]->setBackend(name, device);
-}
\ No newline at end of file
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name);
+}
+
+}  // namespace Aidge
\ No newline at end of file
diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp
index 2b8d63d7136c45589cba92018d2ecafe17d54e4e..a03fc7d3c602c3ff86551da19defe083a5cc6e3a 100644
--- a/src/operator/SAT/TanhClamp.cpp
+++ b/src/operator/SAT/TanhClamp.cpp
@@ -20,6 +20,20 @@
 
 const std::string Aidge::TanhClamp_Op::Type = "TanhClamp";
 
+Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op)
+    : OperatorTensor(op)
+{
+    if (op.mImpl) {
+        SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend());
+    } else {
+        mImpl = nullptr;
+    }
+}
+
+std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const {
+    return std::make_shared<TanhClamp_Op>(*this);
+}
+
 bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) {
 
     if (inputsAssociated()) {
@@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device
     mOutputs[0]->setBackend(name, device);
 
     // Scale output is always on CPU for now
-    mOutputs[1]->setBackend("cpu"); // XXX why ? 
+    mOutputs[1]->setBackend("cpu"); // XXX why ?
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name);
 }
\ No newline at end of file
diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 562948ca6d145278b1b6e564af5a13f3271ca3b4..6e1dcdb1b64c0a1e94c74ce66cb71f1a458bca35 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -59,7 +59,7 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
         {
             std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
             int nb_channels = convOperator->getInput(1)->dims()[0];
-            std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ...
+            fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ...
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
             std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName);
diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt
index 9d9f81516b0cd2611484ee9e3e06e838833200db..cfdbf0ad393cca9daa300cddd807a7667523d2a0 100644
--- a/unit_tests/CMakeLists.txt
+++ b/unit_tests/CMakeLists.txt
@@ -1,12 +1,23 @@
-Include(FetchContent)
+# Catch2 configuration
+set(CATCH2_MIN_VERSION 3.3.0)
 
-FetchContent_Declare(
-  Catch2
-  GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-  GIT_TAG        v3.0.1 # or a later release
-)
+# Try to find system installed Catch2
+find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET)
 
-FetchContent_MakeAvailable(Catch2)
+if(NOT Catch2_FOUND)
+    message(STATUS "Catch2 not found in system, retrieving from git")
+    Include(FetchContent)
+
+    FetchContent_Declare(
+      Catch2
+      GIT_REPOSITORY https://github.com/catchorg/Catch2.git
+      GIT_TAG        devel # or a later release
+    )
+    FetchContent_MakeAvailable(Catch2)
+    message(STATUS "Fetched Catch2 version ${Catch2_VERSION}")
+else()
+    message(STATUS "Using system Catch2 version ${Catch2_VERSION}")
+endif()
 
 file(GLOB_RECURSE src_files "*.cpp")
 
diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp
index 36377e84e27e9de9cc28de323cfba5a44cb80904..e7211ce4092f789c8c6263671ad236b97934ffbb 100644
--- a/unit_tests/Test_QuantPTQ.cpp
+++ b/unit_tests/Test_QuantPTQ.cpp
@@ -1,21 +1,19 @@
-// #include <catch2/catch_test_macros.hpp>
-
-// #include "aidge/data/Tensor.hpp"
-// #include "aidge/backend/TensorImpl.hpp"
-// #include "aidge/backend/cpu.hpp"
-// #include "aidge/operator/Conv.hpp"
-// #include "aidge/operator/Scaling.hpp"
-// #include "aidge/operator/GenericOperator.hpp"
-// #include "aidge/graph/GraphView.hpp"
-// #include "aidge/QuantPTQ.hpp"
-// #include "aidge/scheduler/Scheduler.hpp"
-// #include "aidge/hook/OutputRange.hpp"
-// #include "aidge/operator/Producer.hpp"
-
-// #include <unordered_map>
-
-// using namespace Aidge;
-// //using namespace Aidge_HELPER;
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+TEST_CASE("[tmp] basic test") {
+    REQUIRE(true == true);
+}
 
 // TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") {
 
@@ -79,7 +77,7 @@
 
 //     std::shared_ptr<Tensor> myInput =
 //             std::make_shared<Tensor>(
-//                 Array4D<float,2,3,5,5> { 
+//                 Array4D<float,2,3,5,5> {
 //                     {
 //                         {
 //                             {{  0.,   1.,   2.,   3.,   4.},
@@ -124,7 +122,7 @@
 //             );
 
 //     auto dataProvider = Producer(myInput, "dataProvider");
-//     Tensor myOutput = Array4D<float,2,4,3,3> { 
+//     Tensor myOutput = Array4D<float,2,4,3,3> {
 //         {
 //             {
 //                 {{ 15226.,  15577.,  15928.},
@@ -188,9 +186,9 @@
 //                        "%f"
 //                        "\n",
 //                        max_output_conv);
-        
+
 //     }
-    
+
 //     float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0);
 //     if(verbose) {
 //                 printf("[hook] OutputRange(forward) :: ReLU output max: "
@@ -222,10 +220,10 @@
 //                        "\n",
 //                        (nodePtr->type()).c_str(), (nodePtr->name()).c_str());
 //         }
-//     }    
-    
+//     }
+
 //     SequentialScheduler scheduler_v2(g1);
-    
+
 //     scheduler_v2.forward();
 //     scheduler_v2.generateScheduling(false);
 //     std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling();
@@ -242,7 +240,7 @@
 //                        "\n",
 //                        (nodePtr->type()).c_str(), (nodePtr->name()).c_str());
 //         }
-//     } 
+//     }
 
 
 // }
\ No newline at end of file
diff --git a/version.txt b/version.txt
index 69367fd08f3ce302151ebc9779193d517dfa32de..9e11b32fcaa96816319e5d0dcff9fb2873f04061 100644
--- a/version.txt
+++ b/version.txt
@@ -1,2 +1 @@
-0.3.0
-
+0.3.1