Slight optimization of TensorImpl_cpu

1d453e58 · Maxence Naud · b5e1d886 · 1d453e58
Commit 1d453e58 authored 1 year ago by Maxence Naud
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
 #ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
 #define AIDGE_CPU_DATA_TENSORIMPL_H_

@@ -10,9 +21,10 @@
 #include "aidge/utils/future_std/span.hpp"

 namespace Aidge {
+
 template <class T>
 class TensorImpl_cpu : public TensorImpl {
-   private:
+private:
    const Tensor &mTensor;  // Impl needs to access Tensor information, but is not
                            // supposed to change it!
    /// Pointer to the data and its capacity
@@ -20,7 +32,7 @@ class TensorImpl_cpu : public TensorImpl {
    /// If this instance own the data, std::unique_ptr manages it
    std::unique_ptr<T[]> mDataOwner;

-   public:
+public:
    static constexpr const char *Backend = "cpu";

    TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
@@ -31,8 +43,8 @@ class TensorImpl_cpu : public TensorImpl {

        std::size_t i = 0;
        for (; i < mTensor.size() &&
-               mData[i] == typedOtherImpl.data()[i];
-             ++i) {
+               *(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i));
+               ++i) {
        }
        return i == mTensor.size();
    }
@@ -41,23 +53,20 @@ class TensorImpl_cpu : public TensorImpl {
        return std::make_unique<TensorImpl_cpu<T>>(tensor);
    }

-    // native interface
-    auto data() const -> decltype(mData.data()) { return mData.data(); }
-
-    std::size_t size() const override { return mData.size(); }
-    std::size_t scalarSize() const override { return sizeof(T); }
+    inline std::size_t size() const noexcept override final { return mData.size(); }
+    inline std::size_t scalarSize() const noexcept override final { return sizeof(T); }

-    void setDevice(DeviceIdx_t device) override {
+    void setDevice(DeviceIdx_t device) override final {
        AIDGE_ASSERT(device == 0, "device cannot be != 0 for CPU backend");
    }

-    void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override {
+    void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
        AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity");
        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
                  static_cast<T *>(rawPtr()) + offset);
    }

-    void copyCast(const void *src, NbElts_t length, const DataType srcDt) override {
+    void copyCast(const void *src, NbElts_t length, const DataType srcDt) override final {
        if (length == 0) {
            return;
        }
@@ -101,7 +110,7 @@ class TensorImpl_cpu : public TensorImpl {
                std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length,
                        static_cast<T *>(rawPtr()));
                break;
-            case ataType::Int8:
+            case DataType::Int8:
                std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length,
                        static_cast<T *>(rawPtr()));
                break;
@@ -115,39 +124,39 @@ class TensorImpl_cpu : public TensorImpl {
        }
    }

-    void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override {
+    void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override final {
        AIDGE_ASSERT(device.first == Backend, "backend must match");
        AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend");
        copy(src, length);
    }

-    void copyFromHost(const void *src, NbElts_t length) override {
+    inline void copyFromHost(const void *src, NbElts_t length) override final {
        copy(src, length);
    }

-    void copyToHost(void *dst, NbElts_t length) const override {
+    void copyToHost(void *dst, NbElts_t length) const override final {
        AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity");
        const T* src = static_cast<const T*>(rawPtr());
        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
                  static_cast<T *>(dst));
    }

-    void *rawPtr(NbElts_t offset = 0) override {
+    void *rawPtr(NbElts_t offset = 0) override final {
        lazyInit();
        return (mData.data() + offset);
    };

-    const void *rawPtr(NbElts_t offset = 0) const override {
+    const void *rawPtr(NbElts_t offset = 0) const override final {
        AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const rawPtr");
        return (mData.data() + offset);
    };

-    void *hostPtr(NbElts_t offset = 0) override {
+    void *hostPtr(NbElts_t offset = 0) override final {
        lazyInit();
        return (mData.data() + offset);
    };

-    const void *hostPtr(NbElts_t offset = 0) const override {
+    const void *hostPtr(NbElts_t offset = 0) const override final {
        AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const hostPtr");
        return (mData.data() + offset);
    };