diff --git a/include/aidge/backend/TensorImpl.hpp b/include/aidge/backend/TensorImpl.hpp
index a27f0317c59916facef970a3c1b91704fb485cd4..62f13acb3db81954a4fbb753a3e68e1c5a516402 100644
--- a/include/aidge/backend/TensorImpl.hpp
+++ b/include/aidge/backend/TensorImpl.hpp
@@ -67,19 +67,13 @@ private:
 class TensorImpl {
 public:
     TensorImpl() = delete;
-    TensorImpl(const char *backend, DeviceIdx_t device = 0) : mBackend(backend), mDevice(device){};
+    TensorImpl(const char *backend, DeviceIdx_t device, NbElts_t length) : mBackend(backend), mDevice(device), mNbElts(length) {};
 
     /**
      * Return the (backend, device) pair for this implementation.
     */
     std::pair<std::string, DeviceIdx_t> device() const { return std::make_pair(mBackend, mDevice); }
 
-    /**
-     * Set the device ID for current backend.
-     * @param device New device ID on current backend.
-    */
-    virtual void setDevice(DeviceIdx_t device) = 0;
-
     /**
      * Copy data from the same device.
      * @param src Pointer on current implementation device.
@@ -93,30 +87,34 @@ public:
      * @param srcDt Source data type.
      * @param src Pointer on current implementation device.
      * @param length Number of elements to copy.
+     * @param offset Destination offset (in number of elements).
     */
-    virtual void copyCast(const void *src, NbElts_t length, const DataType srcDt) = 0;
+    virtual void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) = 0;
 
     /**
      * Copy data from an other device on the same backend.
      * @param device (backend, device) pair to copy from. The backend must match current implementation backend.
      * @param src Pointer on current implementation backend.
      * @param length Number of elements to copy.
+     * @param offset Destination offset (in number of elements).
     */
-    virtual void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) = 0;
+    virtual void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) = 0;
 
     /**
      * Copy data from host.
      * @param src Host pointer to copy from.
      * @param length Number of elements to copy.
+     * @param offset Destination offset (in number of elements).
     */
-    virtual void copyFromHost(const void *src, NbElts_t length) = 0;
+    virtual void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) = 0;
 
     /**
      * Copy data to host.
      * @param src Host pointer to copy to.
      * @param length Number of elements to copy.
+     * @param offset Source offset (in number of elements).
     */
-    virtual void copyToHost(void *dst, NbElts_t length) const = 0;
+    virtual void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const = 0;
 
     /**
      * Return the raw device pointer.
@@ -146,8 +144,22 @@ public:
         AIDGE_THROW_OR_ABORT(std::runtime_error, "Cannot set raw pointer for backend %s", mBackend);
     };
 
-    virtual std::size_t size() const = 0; // Storage size
-    virtual std::size_t scalarSize() const = 0; // Size of one scalar (in bytes)
+    /**
+     * Set the size, in number of elements, that must be stored.
+    */
+    void resize(NbElts_t length) {
+        mNbElts = length;
+    }
+
+    /**
+     * Return the number of elements stored.
+    */
+    inline std::size_t size() const noexcept { return mNbElts; }
+
+    /**
+     * Return the size (in bytes) of one element (scalar).
+    */
+    virtual std::size_t scalarSize() const noexcept = 0;
     constexpr const char *backend() const { return mBackend; }
     virtual ~TensorImpl() = default;
     virtual bool operator==(const TensorImpl &othImpl) const = 0;
@@ -156,12 +168,16 @@ public:
      * Copy from another backend.
      * @param srcImpl Source TensorImpl to copy from.
      * @param length Number of elements of size scalarSize() to copy
+     * @param srcOffset Source offset (in number of elements).
+     * @param dstOffset Destination offset (in number of elements).
     */
-    void copyFrom(const TensorImpl& srcImpl, NbElts_t length);
+    void copyFrom(const TensorImpl& srcImpl, NbElts_t length, NbElts_t srcOffset = 0, NbElts_t dstOffset = 0);
 
 protected:
     const char *mBackend;
-    DeviceIdx_t mDevice;
+    const DeviceIdx_t mDevice;
+    /// Number of elements (to be) stored
+    NbElts_t mNbElts;
 };
 
 } // namespace Aidge
diff --git a/include/aidge/data/Tensor.hpp b/include/aidge/data/Tensor.hpp
index 8129a900718169861dc2df4213cd3533d1dfe570..658c0b497d9753f1bdfd42a274dbb48970cb6d6b 100644
--- a/include/aidge/data/Tensor.hpp
+++ b/include/aidge/data/Tensor.hpp
@@ -32,15 +32,18 @@ namespace Aidge {
  * Contains a pointer to an actual contiguous implementation of data.
  */
 class Tensor : public Data,
-               public Registrable<Tensor, std::tuple<std::string, DataType>, std::unique_ptr<TensorImpl>(const Tensor &)> {
+               public Registrable<Tensor, std::tuple<std::string, DataType>, std::shared_ptr<TensorImpl>(DeviceIdx_t device, NbElts_t length)> {
    private:
     DataType mDataType; /** enum to specify data type. */
     std::vector<DimSize_t> mDims; /** Dimensions of the tensor. */
-    std::unique_ptr<TensorImpl> mImpl; /** Pointer to the actual data implementation. */
+    std::vector<DimSize_t> mStrides; /** Stride dimensions of the tensor. */
+    std::shared_ptr<TensorImpl> mImpl; /** Pointer to the actual data implementation. */
+    std::size_t mImplOffset = 0;
     std::shared_ptr<Tensor> mGrad; /** Pointer to the associated gradient Tensor instance. */
 
     // Cached data
     std::size_t mSize = 0;    /** Number of elements in the Tensor. */
+    bool mContiguous = true;
 
    public:
     static constexpr const char *Type = "Tensor";
@@ -57,21 +60,29 @@ class Tensor : public Data,
     }
 
     /**
-     * @brief Construct a new Tensor object copied from another one.
+     * @brief Construct a new Tensor object from another one (shallow copy).
+     * Data memory is not copied, but shared between the new Tensor and the
+     * initial one.
+     * 
      * @param otherTensor
      */
-    Tensor(const Tensor& otherTensor)
-        : Data(Type),
-          mDataType(otherTensor.mDataType),
-          mDims(otherTensor.mDims),
-          mSize(otherTensor.mSize)
-    {
-        if (otherTensor.hasImpl()) {
-            mImpl = Registrar<Tensor>::create({otherTensor.mImpl->backend(), dataType()})(*this);
-            mImpl->setDevice(otherTensor.mImpl->device().second);
-            // Same backend, same device => directly use copy()
-            mImpl->copy(otherTensor.mImpl->rawPtr(), mSize);
+    Tensor(const Tensor&)            = default;
+    Tensor(Tensor&&)            = default;
+
+    /**
+     * Perform a deep copy of the tensor.
+    */
+    Tensor clone() const {
+        Tensor newTensor(*this);
+        if (!newTensor.isContiguous()) {
+            newTensor.makeContiguous();
+        }
+        else {
+            std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), mDataType})(mImpl->device().second, mSize);
+            newImpl->copy(mImpl->rawPtr(mImplOffset), mSize);
+            newTensor.setImpl(newImpl);
         }
+        return newTensor;
     }
 
     /**
@@ -84,7 +95,8 @@ class Tensor : public Data,
         : Data(Type),
           mDataType(NativeType<T>::type),
           mDims({SIZE_0}),
-          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this)),
+          mStrides({1}),
+          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0)),
           mSize(SIZE_0) {
         mImpl->copyFromHost(&arr.data[0], SIZE_0);
     }
@@ -93,9 +105,9 @@ class Tensor : public Data,
     constexpr Tensor &operator=(Array1D<T, SIZE_0> &&arr) {
         resize({SIZE_0});
         if (!mImpl) {
-            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this);
+            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0);
         }
-        mImpl->copyFromHost(&arr.data[0], SIZE_0);
+        mImpl->copyFromHost(&arr.data[0], SIZE_0, mImplOffset);
         return *this;
     }
 
@@ -110,7 +122,8 @@ class Tensor : public Data,
         : Data(Type),
           mDataType(NativeType<T>::type),
           mDims({SIZE_0, SIZE_1}),
-          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this)),
+          mStrides({SIZE_1, 1}),
+          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1)),
           mSize(SIZE_0 * SIZE_1) {
         mImpl->copyFromHost(&arr.data[0][0], SIZE_0 * SIZE_1);
     }
@@ -119,9 +132,9 @@ class Tensor : public Data,
     constexpr Tensor &operator=(Array2D<T, SIZE_0, SIZE_1> &&arr) {
         resize({SIZE_0, SIZE_1});
         if (!mImpl) {
-            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this);
+            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1);
         }
-        mImpl->copyFromHost(&arr.data[0][0], SIZE_0 * SIZE_1);
+        mImpl->copyFromHost(&arr.data[0][0], SIZE_0 * SIZE_1, mImplOffset);
         return *this;
     }
 
@@ -137,7 +150,8 @@ class Tensor : public Data,
         : Data(Type),
           mDataType(NativeType<T>::type),
           mDims({SIZE_0, SIZE_1, SIZE_2}),
-          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this)),
+          mStrides({SIZE_1 * SIZE_2, SIZE_2, 1}),
+          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1 * SIZE_2)),
           mSize(SIZE_0 * SIZE_1 * SIZE_2) {
         mImpl->copyFromHost(&arr.data[0][0][0], SIZE_0 * SIZE_1 * SIZE_2);
     }
@@ -146,9 +160,9 @@ class Tensor : public Data,
     constexpr Tensor &operator=(Array3D<T, SIZE_0, SIZE_1, SIZE_2> &&arr) {
         resize({SIZE_0, SIZE_1, SIZE_2});
         if (!mImpl) {
-            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this);
+            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1 * SIZE_2);
         }
-        mImpl->copyFromHost(&arr.data[0][0][0], SIZE_0 * SIZE_1 * SIZE_2);
+        mImpl->copyFromHost(&arr.data[0][0][0], SIZE_0 * SIZE_1 * SIZE_2, mImplOffset);
         return *this;
     }
 
@@ -165,7 +179,8 @@ class Tensor : public Data,
         : Data(Type),
           mDataType(NativeType<T>::type),
           mDims({SIZE_0, SIZE_1, SIZE_2, SIZE_3}),
-          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this)),
+          mStrides({SIZE_1 * SIZE_2 * SIZE_3, SIZE_2 * SIZE_3, SIZE_3, 1}),
+          mImpl(Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3)),
           mSize(SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3) {
         mImpl->copyFromHost(&arr.data[0][0][0][0], SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3);
     }
@@ -174,33 +189,35 @@ class Tensor : public Data,
     constexpr Tensor &operator=(Array4D<T, SIZE_0, SIZE_1, SIZE_2, SIZE_3> &&arr) {
         resize({SIZE_0, SIZE_1, SIZE_2, SIZE_3});
         if (!mImpl) {
-            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(*this);
+            mImpl = Registrar<Tensor>::create({"cpu", NativeType<T>::type})(0, SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3);
         }
-        mImpl->copyFromHost(&arr.data[0][0][0][0], SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3);
+        mImpl->copyFromHost(&arr.data[0][0][0][0], SIZE_0 * SIZE_1 * SIZE_2 * SIZE_3, mImplOffset);
         return *this;
     }
 
     /**
-     * @brief Copy dimensions, datatype and data of another Tensor.
+     * @brief Copy dimensions, datatype and data from another Tensor.
+     * If current Tensor already has an implementation, data is copied to the
+     * existing implementation. Tensor backend/device remain untouched.
+     * If current Tensor does not have an implementation, only a shallow copy
+     * is performed and the Tensor will share data with t.
      * @param t other Tensor object.
      * @return Tensor&
      */
     Tensor &operator=(const Tensor &t) {
-        resize(t.dims());
-        setDataType(t.dataType());
+        resize(t.dims(), t.strides());
+        setDataType(t.dataType(), false); // do not convert existing data
         if (t.hasImpl()) {
             if (hasImpl()) {
-                copyCastFrom(t);
+                copyFrom(t);
             }
             else {
-                mImpl = Registrar<Tensor>::create({t.mImpl->backend(), dataType()})(*this);
-                mImpl->setDevice(t.mImpl->device().second);
-                // Same backend, same device => directly use copy()
-                mImpl->copy(t.mImpl->rawPtr(), mSize);
+                // Perform a shallow copy only
+                setImpl(t.mImpl, t.mImplOffset);
             }
         }
         else {
-            mImpl = nullptr;
+            setImpl(nullptr);
         }
         return *this;
     }
@@ -233,17 +250,15 @@ class Tensor : public Data,
             if (mImpl->device() != std::make_pair(name, device)) {
                 // Backend change: create new impl, copy from old to new and replace
                 // impl
-                std::unique_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({name, mDataType})(*this);
-                newImpl->setDevice(device);
+                std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({name, mDataType})(device, mImpl->size());
                 if (copyFrom) {
-                    newImpl->copyFrom(*mImpl, size());
+                    newImpl->copyFrom(*mImpl, mImpl->size(), mImplOffset, 0);
                 }
-                mImpl = std::move(newImpl);
+                setImpl(newImpl);
             }
         }
         else {
-            mImpl = Registrar<Tensor>::create({name, mDataType})(*this);
-            mImpl->setDevice(device);
+            mImpl = Registrar<Tensor>::create({name, mDataType})(device, mSize);
         }
     }
 
@@ -273,21 +288,32 @@ class Tensor : public Data,
      */
     void setDataType(const DataType dt, bool copyCast = true) {
         if (mImpl && (dataType() != dt)) {
-            std::unique_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), dt})(*this);
+            std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), dt})(mImpl->device().second, mImpl->size());
             if (copyCast) {
-                newImpl->copyCast(mImpl->rawPtr(), size(), mDataType);
+                newImpl->copyCast(mImpl->rawPtr(mImplOffset), mDataType, mImpl->size());
             }
-            mImpl = std::move(newImpl);
+            setImpl(newImpl);
         }
         mDataType = dt;
     }
 
     /**
      * @brief Get the Impl object
-     * @return constexpr const std::unique_ptr<TensorImpl>&
+     * @return constexpr const std::shared_ptr<TensorImpl>&
      */
-    constexpr const std::unique_ptr<TensorImpl> &getImpl() { return mImpl; }
-    constexpr const std::unique_ptr<TensorImpl> &getImpl() const { return mImpl; }
+    constexpr const std::shared_ptr<TensorImpl> &getImpl() const { return mImpl; }
+    constexpr std::size_t getImplOffset() const { return mImplOffset; }
+
+    /**
+     * @brief Set the Impl object
+     * 
+     * @param impl New impl shared pointer
+     * @param implOffset Storage offset in this new impl for this Tensor
+     */
+    void setImpl(std::shared_ptr<TensorImpl> impl, std::size_t implOffset = 0) {
+        mImpl = impl;
+        mImplOffset = implOffset;
+    }
 
     /**
      * @brief Return if an implementaiton has been associated.
@@ -319,6 +345,18 @@ class Tensor : public Data,
      */
     constexpr const std::vector<DimSize_t> &dims() const { return mDims; }
 
+    /**
+     * @brief Get strides of the Tensor object.
+     * @return constexpr const std::vector<DimSize_t>&
+     */
+    constexpr const std::vector<DimSize_t> &strides() const { return mStrides; }
+
+    /**
+     * @brief Return true if Tensor is contiguous in memory.
+     * @return bool
+     */
+    constexpr bool isContiguous() const { return mContiguous; }
+
     /**
      * @brief Get the number of elements in the Tensor object.
      * @return constexpr std::size_t
@@ -350,10 +388,49 @@ class Tensor : public Data,
      * one, all previous data is invalided. Otherwise, previous data may or may
      * not remain valid, depending on the backend implementation.
      * @param dims New dimensions
+     * @param strides Stride of the tensor (if not specified, "nested" stride is used)
      */
-    void resize(const std::vector<DimSize_t> &dims) {
-        mDims = dims;
-        computeSize();
+    void resize(const std::vector<DimSize_t> &dims, std::vector<DimSize_t> strides = std::vector<DimSize_t>()) {
+        bool checkContiguous = true;
+        if (strides.empty()) {
+            strides.resize(dims.size());
+            size_t expectedStride = 1;
+            for (int dim = dims.size() - 1; dim >= 0; --dim) {
+                strides[dim] = expectedStride;
+                expectedStride*= dims[dim];
+            }
+            checkContiguous = false;
+        }
+        else {
+            AIDGE_ASSERT(strides.size() == dims.size(), "Number of strides must match number of dims");
+        }
+
+        if (mImpl.use_count() > 1) {
+            // Here we could also create a new storage for this tensor in this case
+            // But, is it more likely that the user really wants this, or that he did a mistake?
+            AIDGE_ASSERT(dims == mDims && strides == mStrides, "Cannot resize Tensor with shared storage");
+        }
+        else {
+            mDims = dims;
+            mStrides = strides;
+
+            mContiguous = true;
+            if (checkContiguous) {
+                size_t expectedStride = 1;
+                for (int dim = dims.size() - 1; dim >= 0; --dim) {
+                    if (strides[dim] != expectedStride) {
+                        mContiguous = false;
+                        break;
+                    }
+                    expectedStride*= dims[dim];
+                }
+            }
+
+            computeSize();
+            if (mImpl) {
+                mImpl->resize(mSize);
+            }
+        }
     }
 
     /**
@@ -367,25 +444,25 @@ class Tensor : public Data,
     const expectedType& get(std::size_t idx) const {
         AIDGE_ASSERT(NativeType<expectedType>::type == mDataType, "wrong data type");
         AIDGE_ASSERT(idx < mSize, "idx out of range");
-        return *reinterpret_cast<expectedType *>(mImpl->hostPtr(idx));
+        return *reinterpret_cast<expectedType *>(mImpl->hostPtr(mImplOffset + idx));
     }
 
     template <typename expectedType>
     const expectedType& get(std::vector<std::size_t> coordIdx) const {
-        return get<expectedType>(getIdx(coordIdx));
+        return get<expectedType>(getStorageIdx(coordIdx));
     }
 
     template <typename expectedType>
     void set(std::size_t idx, expectedType value){
         AIDGE_ASSERT(NativeType<expectedType>::type == mDataType, "wrong data type");
         AIDGE_ASSERT(idx < mSize, "idx out of range");
-        expectedType* dataPtr = static_cast<expectedType*>(mImpl->hostPtr(idx));
+        expectedType* dataPtr = static_cast<expectedType*>(mImpl->hostPtr(mImplOffset + idx));
         *dataPtr = value;
     }
 
     template <typename expectedType>
     void set(std::vector<std::size_t> coordIdx, expectedType value){
-        set<expectedType>(getIdx(coordIdx), value);
+        set<expectedType>(getStorageIdx(coordIdx), value);
     }
 
 
@@ -449,9 +526,9 @@ class Tensor : public Data,
                     for (; dimVals[dim] < static_cast<std::size_t>(dims()[dim]); ++dimVals[dim]) {
                         res += spaceString + "{";
                         for (DimSize_t j = 0; j < dims()[dim + 1] - 1; ++j) {
-                            res += " " + ptrToString(mDataType, mImpl->hostPtr(), counter++) + ",";
+                            res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + ",";
                         }
-                        res += " " + ptrToString(mDataType, mImpl->hostPtr(), counter++) + "}";
+                        res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + "}";
                         if (dimVals[dim] < static_cast<std::size_t>(dims()[dim] - 1)) {
                             res += ",";
                         }
@@ -471,7 +548,7 @@ class Tensor : public Data,
         } else {
             res += "{";
             for (DimSize_t j = 0; j < dims()[0]; ++j) {
-                res += " " + ptrToString(mDataType, mImpl->hostPtr(), j) + ((j < dims()[0]-1) ? "," : " ");
+                res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), j) + ((j < dims()[0]-1) ? "," : " ");
             }
         }
         res += "}";
@@ -493,6 +570,7 @@ class Tensor : public Data,
 
     /**
      * @brief From the the 1D contiguous index, return the coordinate of an element in the tensor.
+     * Beware: do not use this function with the storage index!
      *
      * @param flatIdx 1D contiguous index of the value considering a flatten, contiguous, tensor.
      * @return std::vector<DimSize_t>
@@ -512,6 +590,8 @@ class Tensor : public Data,
      * @brief From the coordinate returns the 1D contiguous index of an element in the tensor.
      * If the number of coordinates is inferior to the number of dimensions,
      * the remaining coordinates are assumed to be 0.
+     * Beware: the contiguous index will only correspond to the storage index
+     * if the tensor is contiguous!
      *
      * @param coordIdx Coordinate to an element in the tensor
      * @return DimSize_t Contiguous index
@@ -527,6 +607,51 @@ class Tensor : public Data,
         return flatIdx + coordIdx[i];
     }
 
+    /**
+     * @brief From the coordinate returns the 1D storage index of an element in the tensor.
+     * If the number of coordinates is inferior to the number of dimensions,
+     * the remaining coordinates are assumed to be 0.
+     *
+     * @param coordIdx Coordinate to an element in the tensor
+     * @return DimSize_t Storage index
+     */
+    std::size_t getStorageIdx(const std::vector<std::size_t>& coordIdx) const {
+        AIDGE_ASSERT(coordIdx.size() <= mDims.size(), "Coordinates does not match number of dimensions");
+        return std::inner_product(coordIdx.begin(), coordIdx.end(), mStrides.begin(), DimSize_t(0));
+    }
+
+    /**
+     * Returns a sub-tensor with one or more dimension less.
+     * For instance, t.extract({1}) on a CHW tensor will return the HW tensor
+     * of channel #1.
+     * Likewise, t.extract({0, 1}) on a NCHW tensor will return the HW tensor
+     * of batch #0 and channel #1.
+     * No memory copy is performed, the returned tensor does not own the memory.
+     * If the number of coordinates matches the number of dimensions, an empty
+     * tensor is returned.
+     * It current tensor was contiguous, the returned tensor is garanteed to be
+     * contiguous as well.
+     * 
+     * @param coordIdx Coordinates of the sub-tensor to extract
+     * @return Tensor Sub-tensor.
+    */
+    Tensor extract(const std::vector<std::size_t>& coordIdx) const;
+
+    /**
+     * Returns a sub-tensor at some coordinate and with some dimension.
+     * 
+     * @param coordIdx First coordinates of the sub-tensor to extract
+     * @param dims Dimensions of the sub-tensor to extract
+     * @return Tensor Sub-tensor.
+    */
+    Tensor extract(const std::vector<std::size_t>& coordIdx, const std::vector<std::size_t>& dims) const;
+
+    /**
+     * Make the tensor's storage contiguous, if it is not already the case.
+     * If not contiguous, a new memory space is allocated.
+    */
+    void makeContiguous();
+
     /**
      * Copy-cast data from a Tensor on the same device.
      * If current tensor backend/device is set and is different from src, an
@@ -572,6 +697,20 @@ class Tensor : public Data,
         copyCastFrom(src, movedSrc);
     }
 
+    /**
+     * Return a reference to a Tensor that is garanteed to be contiguous:
+     * - itself, if already contiguous;
+     * - the provided Tensor, overwritten with the copied data.
+     * The data type, backend and device stay the same.
+     * @param fallback A shared_ptr to Tensor ready to be overwritten if necessary.
+     * The shared_ptr does not need to be initialized. No new memory allocation
+     * will occur if fallback has already been allocated with the right 
+     * type/size/device.
+     * @return Reference to either itself or to fallback.
+    */
+    Tensor& refContiguous(std::shared_ptr<Tensor>& fallback);
+    const Tensor& refContiguous(std::shared_ptr<Tensor>& fallback) const;
+
     /**
      * Return a reference to a Tensor casted to the desired data type:
      * - itself, if already at the right data type;
@@ -642,6 +781,43 @@ class Tensor : public Data,
         return refCastFrom(fallback, targetReqs.dataType(), device.first, device.second);
     }
 
+    /**
+     * Return a reference to a Tensor on desired data type and backend/device:
+     * - itself, if already with the right characteristics;
+     * - the provided Tensor, overwritten with the right characteristics.
+     * NOTE: no data is copy-casted. If it was so in a previous refCastFrom() on
+     * the same fallback, it remains valid, otherwise, data is invalid.
+     * @param fallback A shared_ptr to Tensor ready to be overwritten if necessary.
+     * The shared_ptr does not need to be initialized. No new memory allocation
+     * will occur if fallback has already been allocated with the right
+     * type/size/device.
+     * @param dt The desired data type.
+     * @param backend The desired backend.
+     * @param device The desired device.
+     * @return Reference to either itself or to fallback.
+    */
+    Tensor& ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string &backend, DeviceIdx_t device = 0);
+    const Tensor& ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string &backend, DeviceIdx_t device = 0) const;
+
+    /**
+     * Return a reference to a Tensor with same characteristics
+     * (data type, backend/device) as targetReqs Tensor:
+     * - itself, if already with the right characteristics;
+     * - the provided Tensor, overwritten with the right characteristics.
+     * NOTE: no data is copy-casted. If it was so in a previous refCastFrom() on
+     * the same fallback, it remains valid, otherwise, data is invalid.
+     * @param fallback A shared_ptr to Tensor ready to be overwritten if necessary.
+     * The shared_ptr does not need to be initialized. No new memory allocation
+     * will occur if fallback has already been allocated with the right
+     * type/size/device.
+     * @param targetReqs Tensor with the desired target characteristics.
+     * @return Reference to either itself or to fallback.
+    */
+    Tensor& ref(std::shared_ptr<Tensor>& fallback, const Tensor& targetReqs) {
+        const auto& device = targetReqs.getImpl()->device();
+        return ref(fallback, targetReqs.dataType(), device.first, device.second);
+    }
+
 private:
     ///\bug not protected against overflow
     void computeSize() {
diff --git a/include/aidge/operator/Gather.hpp b/include/aidge/operator/Gather.hpp
index 20082eed28825ade9d62fb5d4e081840d3bd4442..f6647f99151304d0cf083aed109cc642c9f1ecc2 100644
--- a/include/aidge/operator/Gather.hpp
+++ b/include/aidge/operator/Gather.hpp
@@ -27,25 +27,26 @@
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
-enum class GatherAttr { Axis };
+enum class GatherAttr { Indices, GatheredShape, Axis };
 
 class Gather_Op : public OperatorTensor,
                 public Registrable<Gather_Op,
                                    std::string,
                                    std::unique_ptr<OperatorImpl>(const Gather_Op&)>,
-                public StaticAttributes<GatherAttr, int> {
+                public StaticAttributes<GatherAttr, std::vector<std::int64_t>, std::vector<DimSize_t>, std::int64_t> {
 
 public:
     static const std::string Type;
 
     Gather_Op() = delete;
 
-
-    using Attributes_ = StaticAttributes<GatherAttr, int>;
+    using Attributes_ = StaticAttributes<GatherAttr, std::vector<std::int64_t>, std::vector<DimSize_t>, std::int64_t>;
     template <GatherAttr e> using attr = typename Attributes_::template attr<e>;
-    Gather_Op(int axis)
-            : OperatorTensor(Type, 2, 0, 1),
+    Gather_Op(const std::vector<std::int64_t>& indices, const std::vector<DimSize_t>& gatheredShape, std::int64_t axis)
+            : OperatorTensor(Type, 1, 0, 1),
             Attributes_(
+                attr<GatherAttr::Indices>(indices),
+                attr<GatherAttr::GatheredShape>(gatheredShape),
                 attr<GatherAttr::Axis>(axis))
     {}
 
@@ -76,21 +77,21 @@ public:
     }
 
     static const std::vector<std::string> getInputsName(){
-        return {"data_input", "indexes"};
+        return {"data_input"};
     }
     static const std::vector<std::string> getOutputsName(){
         return {"data_output"};
     }
 };
 
-inline std::shared_ptr<Node> Gather(int axis = 0, const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<Gather_Op>(axis), name);
+inline std::shared_ptr<Node> Gather( const std::vector<std::int64_t>& indices, const std::vector<DimSize_t>& gatheredShape, std::int64_t axis = 0, const std::string& name = "") {
+    return std::make_shared<Node>(std::make_shared<Gather_Op>(indices, gatheredShape, axis), name);
 }
 } // namespace Aidge
 
 namespace {
 template <>
-const char *const EnumStrings<Aidge::GatherAttr>::data[] = {"Axis"};
+const char *const EnumStrings<Aidge::GatherAttr>::data[] = {"Indices", "GatheredShape", "Axis"};
 }
 
 #endif /* AIDGE_CORE_OPERATOR_GATHER_H_ */
diff --git a/include/aidge/operator/Slice.hpp b/include/aidge/operator/Slice.hpp
index 12a7425f3339b7fbc0ae010639aacf23d97b0f5f..4a073bc525640846c28d718d09741a67d499830e 100644
--- a/include/aidge/operator/Slice.hpp
+++ b/include/aidge/operator/Slice.hpp
@@ -29,17 +29,17 @@ enum class SliceAttr { Starts, Ends, Axes };
 class Slice_Op
     : public OperatorTensor,
       public Registrable<Slice_Op, std::string, std::unique_ptr<OperatorImpl>(const Slice_Op &)>,
-      public StaticAttributes<SliceAttr, std::vector<std::int32_t>, std::vector<std::int32_t>, std::vector<std::int32_t>> {
+      public StaticAttributes<SliceAttr, std::vector<std::int64_t>, std::vector<std::int64_t>, std::vector<std::int64_t>> {
 public:
     static const std::string Type;
 
     Slice_Op() = delete;
 
-    using Attributes_ = StaticAttributes<SliceAttr, std::vector<std::int32_t>, std::vector<std::int32_t>, std::vector<std::int32_t>>;
+    using Attributes_ = StaticAttributes<SliceAttr, std::vector<std::int64_t>, std::vector<std::int64_t>, std::vector<std::int64_t>>;
     template <SliceAttr e>
     using attr = typename Attributes_::template attr<e>;
 
-    Slice_Op(const std::vector<std::int32_t>& starts, const std::vector<std::int32_t>&  ends, const std::vector<std::int32_t>& axes)
+    Slice_Op(const std::vector<std::int64_t>& starts, const std::vector<std::int64_t>&  ends, const std::vector<std::int64_t>& axes)
         : OperatorTensor(Type, 1, 0, 1),
           Attributes_(attr<SliceAttr::Starts>(starts),
                       attr<SliceAttr::Ends>(ends),
@@ -94,9 +94,9 @@ public:
  * @param name Name of the Operator.
  * @return std::shared_ptr<Node> A Node containing the Operator.
  */
-inline std::shared_ptr<Node> Slice(const std::vector<std::int32_t> starts,
-                                   const std::vector<std::int32_t> ends,
-                                   const std::vector<std::int32_t> axes,
+inline std::shared_ptr<Node> Slice(const std::vector<std::int64_t> starts,
+                                   const std::vector<std::int64_t> ends,
+                                   const std::vector<std::int64_t> axes,
                                    const std::string &name = "") {
     // FIXME: properly handle default w&b initialization in every cases
     return std::make_shared<Node>(std::make_shared<Slice_Op>(starts, ends, axes), name);
diff --git a/python_binding/data/pybind_Tensor.cpp b/python_binding/data/pybind_Tensor.cpp
index 6d6f20ebe9377ce177d936c00f097fee76954bd9..9fbf08d0b782b6f39b2bef3d0b3ab918f6789ac0 100644
--- a/python_binding/data/pybind_Tensor.cpp
+++ b/python_binding/data/pybind_Tensor.cpp
@@ -30,7 +30,7 @@ void addCtor(py::class_<Tensor,
                         Data,
                         Registrable<Tensor,
                                     std::tuple<std::string, DataType>,
-                                    std::unique_ptr<TensorImpl>(const Tensor&)>>& mTensor){
+                                    std::shared_ptr<TensorImpl>(DeviceIdx_t device, NbElts_t length)>>& mTensor){
     mTensor.def(py::init([](
         py::array_t<T, py::array::c_style | py::array::forcecast> b,
         std::string backend = "cpu") {
@@ -60,16 +60,16 @@ void addCtor(py::class_<Tensor,
 void init_Tensor(py::module& m){
     py::class_<Registrable<Tensor,
                            std::tuple<std::string, DataType>,
-                           std::unique_ptr<TensorImpl>(const Tensor&)>,
+                           std::shared_ptr<TensorImpl>(DeviceIdx_t device, NbElts_t length)>,
                std::shared_ptr<Registrable<Tensor,
                                            std::tuple<std::string, DataType>,
-                                           std::unique_ptr<TensorImpl>(const Tensor&)>>>(m,"TensorRegistrable");
+                                           std::shared_ptr<TensorImpl>(DeviceIdx_t device, NbElts_t length)>>>(m,"TensorRegistrable");
 
     py::class_<Tensor, std::shared_ptr<Tensor>,
                Data,
                Registrable<Tensor,
                            std::tuple<std::string, DataType>,
-                           std::unique_ptr<TensorImpl>(const Tensor&)>> pyClassTensor
+                           std::shared_ptr<TensorImpl>(DeviceIdx_t device, NbElts_t length)>> pyClassTensor
         (m,"Tensor", py::multiple_inheritance(), py::buffer_protocol());
 
     pyClassTensor.def(py::init<>())
@@ -78,7 +78,7 @@ void init_Tensor(py::module& m){
     .def("dims", (const std::vector<DimSize_t>& (Tensor::*)()const) &Tensor::dims)
     .def("dtype", &Tensor::dataType)
     .def("size", &Tensor::size)
-    .def("resize", (void (Tensor::*)(const std::vector<DimSize_t>&)) &Tensor::resize)
+    .def("resize", (void (Tensor::*)(const std::vector<DimSize_t>&, std::vector<DimSize_t>)) &Tensor::resize)
     .def("has_impl", &Tensor::hasImpl)
     .def("get_coord", &Tensor::getCoord)
     .def("get_idx", &Tensor::getIdx)
@@ -120,7 +120,7 @@ void init_Tensor(py::module& m){
         }
     })
     .def_buffer([](Tensor& b) -> py::buffer_info {
-        const std::unique_ptr<TensorImpl>& tensorImpl = b.getImpl();
+        const std::shared_ptr<TensorImpl>& tensorImpl = b.getImpl();
 
         std::vector<size_t> dims;
         std::vector<size_t> strides;
diff --git a/python_binding/operator/pybind_Gather.cpp b/python_binding/operator/pybind_Gather.cpp
index 92a93c3005fca5ff08ab255d2ded530424883a0d..f0d55e2f40bd89269c96564cea6b5a002b477b8b 100644
--- a/python_binding/operator/pybind_Gather.cpp
+++ b/python_binding/operator/pybind_Gather.cpp
@@ -24,6 +24,6 @@ void init_Gather(py::module& m) {
     .def("get_outputs_name", &Gather_Op::getOutputsName)
     .def("attributes_name", &Gather_Op::staticGetAttrsName);
 
-    m.def("Gather", &Gather, py::arg("axis"), py::arg("name") = "");
+    m.def("Gather", &Gather, py::arg("indices"), py::arg("gathered_shape"), py::arg("axis"), py::arg("name") = "");
 }
 }  // namespace Aidge
diff --git a/src/backend/TensorImpl.cpp b/src/backend/TensorImpl.cpp
index 3982ee1fed9c9198b539bf9a28edd461992b791f..ee2f82a9cf847bfc6fe51e8d8b621e53a4c93cf4 100644
--- a/src/backend/TensorImpl.cpp
+++ b/src/backend/TensorImpl.cpp
@@ -14,23 +14,23 @@
 #include "aidge/utils/Types.h"
 #include "aidge/utils/ErrorHandling.hpp"
 
-void Aidge::TensorImpl::copyFrom(const TensorImpl& srcImpl, NbElts_t length) {
-    if (&srcImpl == this) {
+void Aidge::TensorImpl::copyFrom(const TensorImpl& srcImpl, NbElts_t length, NbElts_t srcOffset, NbElts_t dstOffset) {
+    if (&srcImpl == this && srcOffset == dstOffset) {
         return;
     }
 
     if (srcImpl.device() != device()) {
         if (srcImpl.backend() == backend()) {
             // Same backend, but different device
-            copyFromDevice(srcImpl.rawPtr(), length, srcImpl.device());
+            copyFromDevice(srcImpl.rawPtr(srcOffset), srcImpl.device(), length, dstOffset);
         }
         else if (srcImpl.hostPtr() != nullptr) {
             // Different backend, but input is valid on host
-            copyFromHost(srcImpl.hostPtr(), length);
+            copyFromHost(srcImpl.hostPtr(srcOffset), length, dstOffset);
         }
         else if (hostPtr() != nullptr) {
             // Different backend, but dst is valid on host
-            srcImpl.copyToHost(hostPtr(), length);
+            srcImpl.copyToHost(hostPtr(srcOffset), length, dstOffset);
         }
         else {
             // No direct link possible from src to dst device
@@ -40,12 +40,12 @@ void Aidge::TensorImpl::copyFrom(const TensorImpl& srcImpl, NbElts_t length) {
             // - There is currently no concrete use case
             // - Just providing a pointer would be unsafe (risk of buffer overflow...)
             auto tmpHostBuffer = std::unique_ptr<char[]>(new char[scalarSize() * length]);
-            srcImpl.copyToHost(tmpHostBuffer.get(), length);
-            copyFromHost(tmpHostBuffer.get(), length);
+            srcImpl.copyToHost(tmpHostBuffer.get(), length, srcOffset);
+            copyFromHost(tmpHostBuffer.get(), length, dstOffset);
         }
     }
     else {
         // Same device: simple copy on device
-        copy(srcImpl.rawPtr(), length);
+        copy(srcImpl.rawPtr(srcOffset), length, dstOffset);
     }
 }
diff --git a/src/data/Tensor.cpp b/src/data/Tensor.cpp
index da0c626d78dd1cc4452bfc07bf6c6a7f58b8d1e4..d45dee5639a6bc082871e1110657392fb97c15ec 100644
--- a/src/data/Tensor.cpp
+++ b/src/data/Tensor.cpp
@@ -13,11 +13,72 @@
 #include "aidge/utils/Types.h"
 #include "aidge/utils/ErrorHandling.hpp"
 
+Aidge::Tensor Aidge::Tensor::extract(const std::vector<std::size_t>& coordIdx) const {
+    AIDGE_ASSERT(isContiguous(), "Tensor must be contiguous");
+    AIDGE_ASSERT(coordIdx.size() <= mDims.size(), "Number of coordinates is higher than number of dimensions");
+
+    Tensor subTensor(mDataType);
+    subTensor.resize(std::vector<size_t>(mDims.begin() + coordIdx.size(), mDims.end()),
+        std::vector<size_t>(mStrides.begin() + coordIdx.size(), mStrides.end()));
+    subTensor.setBackend(mImpl->backend(), mImpl->device().second);
+    subTensor.setImpl(mImpl, mImplOffset + getStorageIdx(coordIdx));
+    return subTensor;
+}
+
+Aidge::Tensor Aidge::Tensor::extract(const std::vector<std::size_t>& coordIdx, const std::vector<std::size_t>& dims) const {
+    AIDGE_ASSERT(isContiguous(), "Tensor must be contiguous");
+    AIDGE_ASSERT(coordIdx.size() == mDims.size(), "Coordinates does not match number of dimensions");
+
+    Tensor subTensor(mDataType);
+    subTensor.resize(dims, mStrides);
+    subTensor.setBackend(mImpl->backend(), mImpl->device().second);
+    subTensor.setImpl(mImpl, mImplOffset + getStorageIdx(coordIdx));
+    return subTensor;
+}
+
+void Aidge::Tensor::makeContiguous() {
+    if (!mImpl || isContiguous()) {
+        return;
+    }
+
+    // Block so that mImpl ref count is 1 for resize()
+    {
+        // Create a new storage that will be contiguous
+        std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), mDataType})(mImpl->device().second, mSize);
+        // Copy elements from old to new storage
+        size_t idx = 0;
+        while (idx < mSize) {
+            const size_t storageIdx = getStorageIdx(getCoord(idx));
+
+            // Determine the size of the contiguous chunk
+            size_t copySize = 1;
+            while (idx + copySize < mSize && 
+                getStorageIdx(getCoord(idx + copySize)) == storageIdx + copySize)
+            {
+                ++copySize;
+            }
+
+            // Perform a single copy for the contiguous chunk
+            newImpl->copy(mImpl->rawPtr(mImplOffset + storageIdx), copySize, idx);
+
+            // Move to the next index after the contiguous chunk
+            idx += copySize;
+        }
+        // Replace old storage by new, contiguous, storage
+        setImpl(newImpl);
+    }
+
+    // Resize tensor without strides => tensor is now contiguous
+    resize(mDims);
+}
+
 void Aidge::Tensor::copyCast(const Tensor& src) {
     if (&src == this) {
         return;
     }
 
+    AIDGE_ASSERT(src.isContiguous(), "cannot copy-cast non-contiguous tensor");
+
     // Current Tensor has necessarily a data type, but may not have backend
     if (!getImpl()) {
         // If no backend was set for the current tensor, use the same as src
@@ -27,7 +88,7 @@ void Aidge::Tensor::copyCast(const Tensor& src) {
     resize(src.dims());
 
     AIDGE_ASSERT(src.getImpl()->device() == getImpl()->device(), "cannot copy-cast from a different backend/device");
-    getImpl()->copyCast(src.getImpl()->rawPtr(), src.size(), src.dataType());
+    getImpl()->copyCast(src.getImpl()->rawPtr(src.mImplOffset), src.dataType(), src.size(), mImplOffset);
 }
 
 void Aidge::Tensor::copyFrom(const Tensor& src) {
@@ -35,6 +96,8 @@ void Aidge::Tensor::copyFrom(const Tensor& src) {
         return;
     }
 
+    AIDGE_ASSERT(src.isContiguous(), "cannot copy from non-contiguous tensor");
+
     // Current Tensor has necessarily a data type, but may not have backend
     if (!getImpl()) {
         // If no backend was set for the current tensor, use the same as src
@@ -44,7 +107,7 @@ void Aidge::Tensor::copyFrom(const Tensor& src) {
     resize(src.dims());
 
     AIDGE_ASSERT(src.dataType() == dataType(), "cannot copy from a different data type");
-    getImpl()->copyFrom(*(src.getImpl()), src.size());
+    getImpl()->copyFrom(*(src.getImpl()), src.size(), src.mImplOffset, mImplOffset);
 }
 
 void Aidge::Tensor::copyCastFrom(const Tensor& src, std::shared_ptr<Tensor>& movedSrcPtr) {
@@ -52,6 +115,8 @@ void Aidge::Tensor::copyCastFrom(const Tensor& src, std::shared_ptr<Tensor>& mov
         return;
     }
 
+    AIDGE_ASSERT(src.isContiguous(), "cannot copy-cast from non-contiguous tensor");
+
     // Current Tensor has necessarily a data type, but may not have backend
     if (!getImpl()) {
         // If no backend was set for the current tensor, use the same as src
@@ -65,12 +130,35 @@ void Aidge::Tensor::copyCastFrom(const Tensor& src, std::shared_ptr<Tensor>& mov
         const auto device = getImpl()->device();
         const Tensor& movedSrc = src.refFrom(movedSrcPtr, device.first, device.second);
         // Second, copy-cast data (necessary)
-        getImpl()->copyCast(movedSrc.getImpl()->rawPtr(), movedSrc.size(), movedSrc.dataType());
+        getImpl()->copyCast(movedSrc.getImpl()->rawPtr(movedSrc.mImplOffset), movedSrc.dataType(), movedSrc.size(), mImplOffset);
     }
     else {
         // Directly copy, no conversion necessary
         // Avoid making a double copy if both data type and device are the same
-        getImpl()->copyFrom(*(src.getImpl()), src.size());
+        getImpl()->copyFrom(*(src.getImpl()), src.size(), src.mImplOffset, mImplOffset);
+    }
+}
+
+Aidge::Tensor& Aidge::Tensor::refContiguous(std::shared_ptr<Tensor>& fallback) {
+    // Scott Meyers' solution to avoid code duplication
+    return const_cast<Tensor&>(static_cast<const Tensor&>(*this).refContiguous(fallback));
+}
+
+const Aidge::Tensor& Aidge::Tensor::refContiguous(std::shared_ptr<Tensor>& fallback) const {
+    AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot refCast() it");
+
+    if (isContiguous()) {
+        return *this;
+    }
+    else {
+        if (this != fallback.get()) {
+            // Shallow copy to fallback
+            *fallback = *this;
+        }
+
+        // Make fallback contiguous
+        fallback->makeContiguous();
+        return *fallback;
     }
 }
 
@@ -91,6 +179,8 @@ const Aidge::Tensor& Aidge::Tensor::refCast(std::shared_ptr<Tensor>& fallback, c
             fallback->setDataType(dt);
         }
         else {
+            AIDGE_ASSERT(isContiguous(), "cannot refCast non-contiguous tensor");
+
             if (!fallback) {
                 fallback = std::make_shared<Tensor>(dt);
             }
@@ -101,7 +191,7 @@ const Aidge::Tensor& Aidge::Tensor::refCast(std::shared_ptr<Tensor>& fallback, c
             const auto device = getImpl()->device();
             fallback->setBackend(device.first, device.second, false); // don't keep previous data (no copy)
             fallback->resize(dims());
-            fallback->getImpl()->copyCast(getImpl()->rawPtr(), size(), dataType());
+            fallback->getImpl()->copyCast(getImpl()->rawPtr(mImplOffset), dataType(), size(), fallback->mImplOffset);
         }
         return *fallback;
     }
@@ -124,6 +214,8 @@ const Aidge::Tensor& Aidge::Tensor::refFrom(std::shared_ptr<Tensor>& fallback, c
             fallback->setBackend(backend, device);
         }
         else {
+            AIDGE_ASSERT(isContiguous(), "cannot refFrom non-contiguous tensor");
+
             if (!fallback) {
                 fallback = std::make_shared<Tensor>(dataType());
             }
@@ -133,8 +225,34 @@ const Aidge::Tensor& Aidge::Tensor::refFrom(std::shared_ptr<Tensor>& fallback, c
 
             fallback->setBackend(backend, device, false); // don't keep previous data (no copy)
             fallback->resize(dims());
-            fallback->getImpl()->copyFrom(*getImpl(), size());
+            fallback->getImpl()->copyFrom(*getImpl(), size(), mImplOffset, fallback->mImplOffset);
+        }
+        return *fallback;
+    }
+}
+
+Aidge::Tensor& Aidge::Tensor::ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string &backend, DeviceIdx_t device) {
+    // Scott Meyers' solution to avoid code duplication
+    return const_cast<Tensor&>(static_cast<const Tensor&>(*this).ref(fallback, dt, backend, device));
+}
+
+const Aidge::Tensor& Aidge::Tensor::ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string &backend, DeviceIdx_t device) const {
+    AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot ref() it");
+
+    if (dt == dataType() && std::make_pair(backend, device) == getImpl()->device()) {
+        return *this;
+    }
+    else {
+        // Change fallback type, backend & device, without any data copy
+        if (!fallback) {
+            fallback = std::make_shared<Tensor>(dt);
         }
+        else {
+            fallback->setDataType(dt, false); // don't keep previous data (no copy)
+        }
+
+        fallback->setBackend(backend, device, false); // don't keep previous data (no copy)
+        fallback->resize(dims());
         return *fallback;
     }
 }
diff --git a/src/operator/Gather.cpp b/src/operator/Gather.cpp
index 30804994b6084a5a5558f106a38a6087e54471bc..b5f9d738a0280b3bacdb2ce201c8303b2b4d0a1f 100644
--- a/src/operator/Gather.cpp
+++ b/src/operator/Gather.cpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <string>
 #include <vector>
 
@@ -22,18 +22,26 @@ const std::string Aidge::Gather_Op::Type = "Gather";
 
 void Aidge::Gather_Op::computeOutputDims() {
     // check inputs have been associated
-    if (!getInput(0) || !getInput(1)) {
-        AIDGE_THROW_OR_ABORT(std::runtime_error, "At least one input was not connected");
+    if (!getInput(0)) {
+        AIDGE_THROW_OR_ABORT(std::runtime_error, "Input was not connected");
     }
 
-    if (getInput(1)->nbDims()!=2){
-        AIDGE_THROW_OR_ABORT(std::runtime_error, "Indices input must be a 2D Tensor");
-    }
+    if (!getInput(0)->empty()) {
+        std::vector<DimSize_t> outDims = getInput(0)->dims();
+        const std::vector<DimSize_t> gatheredShape = this->template getAttr<GatherAttr::GatheredShape>();
+        // TODO: check indices and gatheredShape
+
+        const std::int64_t axisIdx = this->template getAttr<GatherAttr::Axis>() >= 0 ?
+                                        this->template getAttr<GatherAttr::Axis>() :
+                                        this->template getAttr<GatherAttr::Axis>() + outDims.size();
+        outDims.erase(outDims.begin() + static_cast<std::size_t>(axisIdx));
+        if (!gatheredShape.empty())
+        {
+            outDims.insert(outDims.cbegin() + static_cast<std::size_t>(axisIdx),
+                            gatheredShape.cbegin(),
+                            gatheredShape.cend());
+        }
 
-    std::vector<DimSize_t> outDims = getInput(0)->dims();
-    std::vector<DimSize_t> indexesDims = getInput(1)->dims();
-    int axisIdx = this->template getAttr<GatherAttr::Axis>()>=0?this->template getAttr<GatherAttr::Axis>():this->template getAttr<GatherAttr::Axis>()+outDims.size();
-    outDims.erase(outDims.begin() + static_cast<std::size_t>(axisIdx));
-    outDims.insert(outDims.begin() + static_cast<std::size_t>(axisIdx), indexesDims.begin(),indexesDims.end());
-    mOutputs[0]->resize(outDims);
+        mOutputs[0]->resize(outDims);
+    }
 }
\ No newline at end of file
diff --git a/src/operator/Reshape.cpp b/src/operator/Reshape.cpp
index b0eea3c1f9f7054021b631c85e0f80e7f8845da6..30b060cd2a58d7995a7447bd9b85b9bc0026a7f7 100644
--- a/src/operator/Reshape.cpp
+++ b/src/operator/Reshape.cpp
@@ -9,39 +9,50 @@
  *
  ********************************************************************************/
 
-#include <cstddef>
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int64_t
+#include <stdexcept>  // std::runtime_error
 #include <string>
 #include <vector>
 
 #include "aidge/operator/Reshape.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Types.h"
 
 const std::string Aidge::Reshape_Op::Type = "Reshape";
 
 void Aidge::Reshape_Op::computeOutputDims() {
-    // check inputs have been associated
+    // check input has been associated
     if (!getInput(0)) {
         AIDGE_THROW_OR_ABORT(std::runtime_error, "Input was not connected");
     }
 
-    DimSize_t nbOutDims = this->template getAttr<ReshapeAttr::Shape>().size();
-    std::vector<DimSize_t> outDims;
-    std::size_t outSize = 1;
-    for(std::size_t i=0; i<nbOutDims; ++i)
-    {
-        int dimSize = this->template getAttr<ReshapeAttr::Shape>()[i];
-        if (dimSize < 1)
+    if (!getInput(0)->empty()) {
+        std::vector<DimSize_t> outDims;
+        // variables to handle a negative dimension
+        bool foundNegativeDimension = false;
+        std::size_t outSize = 1;
+        DimIdx_t negativeIndex = 0;
+
+        for(std::size_t i = 0; i < this->template getAttr<ReshapeAttr::Shape>().size(); ++i)
         {
-            AIDGE_THROW_OR_ABORT(std::runtime_error, "bad dimension value");
+            std::int64_t dimSize = this->template getAttr<ReshapeAttr::Shape>()[i];
+            if (dimSize < 0) {
+                if (foundNegativeDimension) {
+                    AIDGE_THROW_OR_ABORT(std::runtime_error, "Found more than one negative dimension in Reshape Operator.");
+                }
+                foundNegativeDimension = true;
+                dimSize = 1;
+                negativeIndex = static_cast<DimIdx_t>(i);
+            }
+            outDims.push_back(static_cast<DimSize_t>(dimSize));
+            outSize *= static_cast<DimSize_t>(dimSize);
         }
-        outDims.push_back(dimSize);
-        outSize *= dimSize;
-    }
 
-    if (getInput(0)->size() != outSize){
-        AIDGE_THROW_OR_ABORT(std::runtime_error, "Output shape must give the same size as input");
-    }
+        if (foundNegativeDimension) {
+            outDims[negativeIndex] = (getInput(0) -> size()) / outSize;
+        }
 
-    mOutputs[0]->resize(outDims);
+        mOutputs[0]->resize(outDims);
+    }
 }
\ No newline at end of file
diff --git a/src/operator/Slice.cpp b/src/operator/Slice.cpp
index 139e84b561a48c2f6a5ecd14ed9d6905d66dec20..11d91a1fcd4c1d4ee6bcc5f9d830870fa6e732e5 100644
--- a/src/operator/Slice.cpp
+++ b/src/operator/Slice.cpp
@@ -30,21 +30,23 @@ void Aidge::Slice_Op::computeOutputDims() {
         AIDGE_THROW_OR_ABORT(std::runtime_error, "Every input should be associated with a Tensor");
     }
 
-    DimSize_t nbAxes = this->template getAttr<SliceAttr::Axes>().size();
+    const DimSize_t nbAxes = this->template getAttr<SliceAttr::Axes>().size();
     std::vector<DimSize_t> outDims = getInput(0)->dims();
     for (std::size_t i = 0; i < nbAxes; ++i) {
         // For each slice operation get the params and cast them to size_t
         const std::int64_t axis_ = this->template getAttr<SliceAttr::Axes>()[i];
         const std::int64_t start_ = this->template getAttr<SliceAttr::Starts>()[i];
         const std::int64_t end_ = this->template getAttr<SliceAttr::Ends>()[i];
-        const std::size_t axis = axis_ >= 0 ? static_cast<std::size_t>(axis_) : axis_ + getInput(0)->nbDims();
-        const std::size_t start = start_ >= 0 ? static_cast<std::size_t>(start_) : start_ + getInput(0)->dims()[axis];
-        const std::size_t end = end_ >= 0 ? static_cast<std::size_t>(end_) : end_ + getInput(0)->dims()[axis];
+        const std::size_t axis = axis_ >= 0 ? static_cast<std::size_t>(axis_) : static_cast<std::size_t>(axis_) + getInput(0)->nbDims();
+        const std::size_t start = start_ >= 0 ? static_cast<std::size_t>(start_) : static_cast<std::size_t>(start_) + getInput(0)->dims()[axis];
+        const std::size_t end = end_ >= 0 ? static_cast<std::size_t>(end_) : static_cast<std::size_t>(end_) + getInput(0)->dims()[axis];
 
         const std::size_t sliceLength = end - start + 1;
         // Check if slice length is valid
         if (sliceLength > getInput(0)->dims()[axis])
+        {
             AIDGE_THROW_OR_ABORT(std::runtime_error, "ROI of Slice operator out of bounds");
+        }
         outDims[axis] = sliceLength;
     }
     mOutputs[0]->resize(outDims);
diff --git a/src/recipies/HorizontalTiling.cpp b/src/recipies/HorizontalTiling.cpp
index 6cc34eba076934b884b336ce40081a855d917182..7d3fafc0a15d1b797fdfb1a2884b62d2d8d766c5 100644
--- a/src/recipies/HorizontalTiling.cpp
+++ b/src/recipies/HorizontalTiling.cpp
@@ -82,16 +82,16 @@ std::set<std::shared_ptr<Aidge::Node>> Aidge::getConvHorizontalTiling(const std:
         clonedInputs[1] -> addChild(newNode, 0, 1);
         clonedInputs[2] -> addChild(newNode, 0, 2);
         // Slice for input and each parameter
-        std::vector<std::int32_t> inputDimsEnd(inputDims[0].first.size());
+        std::vector<std::int64_t> inputDimsEnd(inputDims[0].first.size());
         for (std::size_t dim = 0; dim < inputDimsEnd.size(); ++dim) {
-            inputDimsEnd[dim] = static_cast<std::int32_t>(inputDims[0].first[dim] + inputDims[0].second[dim]) - 1;
+            inputDimsEnd[dim] = static_cast<std::int64_t>(inputDims[0].first[dim] + inputDims[0].second[dim]) - 1;
         }
-        std::vector<std::int32_t> inputDimsStart(inputDims[0].first.size());
+        std::vector<std::int64_t> inputDimsStart(inputDims[0].first.size());
         for (std::size_t dim = 0; dim < inputDimsStart.size(); ++dim) {
-            inputDimsStart[dim] = static_cast<std::int32_t>(inputDims[0].first[dim]);
+            inputDimsStart[dim] = static_cast<std::int64_t>(inputDims[0].first[dim]);
         }
-        std::vector<std::int32_t> usedDims(inputDimsEnd.size());
-        std::iota(usedDims.begin(), usedDims.end(), static_cast<std::int32_t>(0));
+        std::vector<std::int64_t> usedDims(inputDimsEnd.size());
+        std::iota(usedDims.begin(), usedDims.end(), static_cast<std::int64_t>(0));
         auto slice = Slice(inputDimsStart, inputDimsEnd, usedDims, "Slice_" + std::to_string(currentFirstDims[axis]));
         slice -> addChild(newNode, 0, 0);
         newNode -> addChild(concat, 0, i);