/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include "aidge/data/Tensor.hpp" #include <cstddef> #include <vector> #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/operator/Add.hpp" #include "aidge/operator/Div.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/operator/Sub.hpp" #include "aidge/operator/Transpose.hpp" #include "aidge/utils/Types.h" /** * @brief Transposition operation * * @return Tensor */ Aidge::Tensor Aidge::Tensor::transpose(const std::vector<Aidge::DimSize_t> &outputDimsOrder) const { auto transpose_ = Aidge::Transpose_Op(outputDimsOrder); transpose_.associateInput(0, std::make_shared<Tensor>(*this)); transpose_.setDataType(dataType()); transpose_.setBackend(mImpl->backend()); transpose_.forward(); // using add_backend = std::remove_reference_t<decltype(*Registrar<Add_Op>::create("cpu")(std::declval<const Add_Op&>()))>; return transpose_.getOutput(0)->clone(); } /** * @brief Element-wise addition operation for two ``Tensor``s. * @note ``Tensor``s should be stored on the same backend. * @todo If input ``Tensor``s have a different dataType, the output should * have the dataType of the ``Tensor`` with the highest precision. * * @param other * @return Tensor */ Aidge::Tensor Aidge::Tensor::operator+(const Aidge::Tensor& other) const { AIDGE_ASSERT(hasImpl() && other.hasImpl(), "At least one Tensor cannot perform any binary operation because it has no implementation."); AIDGE_ASSERT(mImpl->backend() == other.mImpl->backend(), "Tensors must have the same backend"); AIDGE_ASSERT(dataType() == other.dataType(), "Tensors must have the same data type"); AIDGE_ASSERT(dataFormat() == other.dataFormat(), "Tensors must have the same data format"); auto add_ = Add_Op(2); add_.associateInput(0, std::make_shared<Tensor>(*this)); add_.associateInput(1, std::make_shared<Tensor>(other)); add_.setDataType(dataType()); add_.setDataFormat(dataFormat()); add_.setBackend(mImpl->backend()); add_.forward(); // using add_backend = std::remove_reference_t<decltype(*Registrar<Add_Op>::create("cpu")(std::declval<const Add_Op&>()))>; return add_.getOutput(0)->clone(); } /** * @brief Element-wise substraction operation for two ``Tensor``s. * @note ``Tensor``s should be stored on the same backend. * @todo If input ``Tensor``s have a different dataType, the output should * have the dataType of the ``Tensor`` with the highest precision. * * @param other * @return Tensor */ Aidge::Tensor Aidge::Tensor::operator-(const Aidge::Tensor& other) const { AIDGE_ASSERT(hasImpl() && other.hasImpl(), "At least one Tensor cannot perform any binary operation because it has no implementation."); AIDGE_ASSERT(mImpl->backend() == other.mImpl->backend(), "Tensors must have the same backend"); AIDGE_ASSERT(dataType() == other.dataType(), "Tensors must have the same data type"); AIDGE_ASSERT(dataFormat() == other.dataFormat(), "Tensors must have the same data format"); auto sub_ = Sub_Op(); sub_.associateInput(0, std::make_shared<Tensor>(*this)); sub_.associateInput(1, std::make_shared<Tensor>(other)); sub_.setDataType(dataType()); sub_.setDataFormat(dataFormat()); sub_.setBackend(mImpl->backend()); sub_.forward(); // using add_backend = std::remove_reference_t<decltype(*Registrar<Add_Op>::create("cpu")(std::declval<const Add_Op&>()))>; return sub_.getOutput(0)->clone(); } /** * @brief Element-wise multiplication operation for two ``Tensor``s. * @note ``Tensor``s should be stored on the same backend. * @todo If input ``Tensor``s have a different dataType, the output should * have the dataType of the ``Tensor`` with the highest precision. * * @param other * @return Tensor */ Aidge::Tensor Aidge::Tensor::operator*(const Aidge::Tensor& other) const { AIDGE_ASSERT(hasImpl() && other.hasImpl(), "At least one Tensor cannot perform any binary operation because it has no implementation."); AIDGE_ASSERT(mImpl->backend() == other.mImpl->backend(), "Tensors must have the same backend"); AIDGE_ASSERT(dataType() == other.dataType(), "Tensors must have the same data type"); AIDGE_ASSERT(dataFormat() == other.dataFormat(), "Tensors must have the same data format"); auto mul_ = Mul_Op(); mul_.associateInput(0, std::make_shared<Tensor>(*this)); mul_.associateInput(1, std::make_shared<Tensor>(other)); mul_.setDataType(dataType()); mul_.setDataFormat(dataFormat()); mul_.setBackend(mImpl->backend()); mul_.forward(); // using add_backend = std::remove_reference_t<decltype(*Registrar<Add_Op>::create("cpu")(std::declval<const Add_Op&>()))>; return mul_.getOutput(0)->clone(); } Aidge::Tensor Aidge::Tensor::operator/(const Aidge::Tensor& other) const { AIDGE_ASSERT(hasImpl() && other.hasImpl(), "At least one Tensor cannot perform any binary operation because it has no implementation."); AIDGE_ASSERT(mImpl->backend() == other.mImpl->backend(), "Tensors must have the same backend"); AIDGE_ASSERT(dataType() == other.dataType(), "Tensors must have the same data type"); AIDGE_ASSERT(dataFormat() == other.dataFormat(), "Tensors must have the same data format"); auto div_ = Div_Op(); div_.associateInput(0, std::make_shared<Tensor>(*this)); div_.associateInput(1, std::make_shared<Tensor>(other)); div_.setDataType(dataType()); div_.setDataFormat(dataFormat()); div_.setBackend(mImpl->backend()); div_.forward(); // using add_backend = std::remove_reference_t<decltype(*Registrar<Add_Op>::create("cpu")(std::declval<const Add_Op&>()))>; return div_.getOutput(0)->clone(); } Aidge::Tensor& Aidge::Tensor::operator=(const Aidge::Tensor& other) { if (this == &other) { return *this; } resize(other.dims(), other.strides()); setDataType(other.dataType(), false); // do not convert existing data if (other.hasImpl()) { if (hasImpl()) { copyFrom(other); } else { // Perform a shallow copy only setImpl(other.mImpl, other.mImplOffset); } } else { setImpl(nullptr); } return *this; } Aidge::Tensor::~Tensor() noexcept = default; void Aidge::Tensor::resize(const std::vector<Aidge::DimSize_t>& dims, std::vector<Aidge::DimSize_t> strides) { // TODO: scalar Tensor not handled if (dims.empty()) { // scalar mDims = std::vector<DimSize_t>(0); mStrides = std::vector<DimSize_t>({1}); mContiguous = true; computeSize(); if (mImpl) { mImpl->resize(mDims); } return; } bool checkContiguous = true; if (strides.empty()) { strides.resize(dims.size()); size_t expectedStride = 1; for (int dim = dims.size() - 1; dim >= 0; --dim) { strides[dim] = expectedStride; expectedStride *= dims[dim]; } checkContiguous = false; } else { AIDGE_ASSERT(strides.size() == dims.size(), "Number of strides must match number of dims"); } if (mImpl && mImpl.use_count() > 1) { // Here we could also create a new storage for this tensor in this case // But, is it more likely that the user really wants this, or that he // did a mistake? AIDGE_ASSERT(dims == mDims && strides == mStrides, "Cannot resize Tensor with shared storage"); } else { mDims = dims; mStrides = strides; mContiguous = true; if (checkContiguous) { std::size_t expectedStride = 1; // std::size_t i = dims.size(); // while ((i-- > 0) && (strides[i] == expectedStride)) { // mContiguous&= (strides[i] == expectedStride); // expectedStride*= dims[i]; // } for (std::size_t i = dims.size() - 1; i > 0; --i) { if (strides[i] != expectedStride) { mContiguous = false; break; } expectedStride *= dims[i]; } mContiguous &= (strides[0] == expectedStride); } computeSize(); if (mImpl) { mImpl->resize(mDims); } } } std::string Aidge::Tensor::toString() const { AIDGE_ASSERT( mImpl && (dims().empty() || (dims() == std::vector<DimSize_t>({0})) || (mImpl->hostPtr() != nullptr)), "tensor should have a valid host pointer"); // TODO: move lambda elsewhere? auto ptrToString = [](DataType dt, void* ptr, std::size_t idx) { switch (dt) { case DataType::Float64: return std::to_string(static_cast<double*>(ptr)[idx]); case DataType::Float32: return std::to_string(static_cast<float*>(ptr)[idx]); case DataType::Float16: return std::to_string(static_cast<half_float::half*>(ptr)[idx]); case DataType::Int8: return std::to_string(static_cast<int8_t*>(ptr)[idx]); case DataType::Int16: return std::to_string(static_cast<int16_t*>(ptr)[idx]); case DataType::Int32: return std::to_string(static_cast<int32_t*>(ptr)[idx]); case DataType::Int64: return std::to_string(static_cast<int64_t*>(ptr)[idx]); case DataType::UInt8: return std::to_string(static_cast<uint8_t*>(ptr)[idx]); case DataType::UInt16: return std::to_string(static_cast<uint16_t*>(ptr)[idx]); case DataType::UInt32: return std::to_string(static_cast<uint32_t*>(ptr)[idx]); case DataType::UInt64: return std::to_string(static_cast<uint64_t*>(ptr)[idx]); default: AIDGE_ASSERT(true, "unsupported type to convert to string"); } return std::string("?"); // To make Clang happy }; if (dims().empty()) { return ptrToString(mDataType, mImpl->hostPtr(), 0); } std::string res; std::size_t dim = 0; std::size_t counter = 0; if (nbDims() >= 2) { std::vector<std::size_t> dimVals(nbDims(), 0); res += "{\n"; while (counter < mSize) { std::string spaceString = std::string((dim + 1) << 1, ' '); if (dim < nbDims() - 2) { if (dimVals[dim] == 0) { res += spaceString + "{\n"; ++dim; } else if (dimVals[dim] < static_cast<std::size_t>(dims()[dim])) { res += spaceString + "},\n" + spaceString + "{\n"; ++dim; } else { res += spaceString + "}\n"; dimVals[dim--] = 0; dimVals[dim]++; } } else { for (; dimVals[dim] < static_cast<std::size_t>(dims()[dim]); ++dimVals[dim]) { res += spaceString + "{"; for (DimSize_t j = 0; j < dims()[dim + 1] - 1; ++j) { res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + ","; } res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), counter++) + "}"; if (dimVals[dim] < static_cast<std::size_t>(dims()[dim] - 1)) { res += ","; } res += "\n"; } if (dim == 0) { break; } dimVals[dim--] = 0; dimVals[dim]++; } } if (nbDims() != 2) { // If nbDims == 2, parenthesis is already closed for (int i = static_cast<int>(dim); i >= 0; --i) { res += std::string((i + 1) << 1, ' ') + "}\n"; } } } else { res += "{"; for (DimSize_t j = 0; j < dims()[0]; ++j) { res += " " + ptrToString(mDataType, mImpl->hostPtr(mImplOffset), j) + ((j < dims()[0] - 1) ? "," : " "); } } res += "}"; return res; } Aidge::Tensor Aidge::Tensor::extract( const std::vector<std::size_t>& fixedCoord) const { AIDGE_ASSERT(isContiguous(), "Tensor must be contiguous"); AIDGE_ASSERT(fixedCoord.size() <= mDims.size(), "Number of coordinates is higher than number of dimensions"); Tensor subTensor(mDataType); subTensor.resize( std::vector<size_t>(mDims.cbegin() + fixedCoord.size(), mDims.cend()), std::vector<size_t>(mStrides.cbegin() + fixedCoord.size(), mStrides.cend())); subTensor.setBackend(mImpl->backend(), mImpl->device().second); subTensor.setImpl(mImpl, mImplOffset + getStorageIdx(fixedCoord)); return subTensor; } Aidge::Tensor Aidge::Tensor::extract( const std::vector<std::size_t>& startCoord, const std::vector<std::size_t>& dims) const { AIDGE_ASSERT(isContiguous(), "Tensor must be contiguous"); AIDGE_ASSERT(startCoord.size() == mDims.size(), "Coordinates does not match number of dimensions"); Tensor subTensor(mDataType); subTensor.resize(dims, mStrides); subTensor.setBackend(mImpl->backend(), mImpl->device().second); subTensor.setImpl(mImpl, mImplOffset + getStorageIdx(startCoord)); return subTensor; } void Aidge::Tensor::makeContiguous() { if (!mImpl || isContiguous()) { return; } // Block so that mImpl ref count is 1 for resize() { // Create a new storage that will be contiguous std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create( {mImpl->backend(), mDataType})(mImpl->device().second, mDims); // Copy elements from old to new storage std::size_t idx = 0; while (idx < mSize) { const std::size_t storageIdx = getStorageIdx(getCoord(idx)); // Determine the size of the contiguous chunk std::size_t copySize = 1; while (idx + copySize < mSize && getStorageIdx(getCoord(idx + copySize)) == storageIdx + copySize) { ++copySize; } // Perform a single copy for the contiguous chunk newImpl->copy(mImpl->rawPtr(mImplOffset + storageIdx), copySize, idx); // Move to the next index after the contiguous chunk idx += copySize; } // Replace old storage by new, contiguous, storage setImpl(newImpl); } // Resize tensor without strides => tensor is now contiguous resize(mDims); } void Aidge::Tensor::copyCast(const Tensor& src) { if (&src == this) { return; } AIDGE_ASSERT(src.isContiguous(), "cannot copy-cast non-contiguous tensor"); // Current Tensor has necessarily a data type, but may not have backend if (!hasImpl()) { // If no backend was set for the current tensor, use the same as src const auto deviceSrc = src.getImpl()->device(); setBackend(deviceSrc.first, deviceSrc.second); } resize(src.dims()); AIDGE_ASSERT(src.getImpl()->device() == getImpl()->device(), "cannot copy-cast from a different backend/device"); getImpl()->copyCast(src.getImpl()->rawPtr(src.mImplOffset), src.dataType(), src.size(), mImplOffset); } void Aidge::Tensor::copyFrom(const Tensor& src) { if (&src == this) { return; } AIDGE_ASSERT(src.isContiguous(), "cannot copy from non-contiguous tensor"); // Current Tensor has necessarily a data type, but may not have backend if (!hasImpl()) { // If no backend was set for the current tensor, use the same as src const auto deviceSrc = src.getImpl()->device(); setBackend(deviceSrc.first, deviceSrc.second); } resize(src.dims()); AIDGE_ASSERT(src.dataType() == dataType(), "cannot copy from a different data type"); getImpl()->copyFrom(*(src.getImpl()), src.size(), src.mImplOffset, mImplOffset); } void Aidge::Tensor::copyTranspose(const Tensor& src, const std::vector<DimSize_t>& transpose) { std::vector<DimSize_t> newDims; for (std::size_t i = 0; i < src.dims().size(); ++i) { newDims.push_back(src.dims()[transpose[i]]); } std::vector<std::size_t> newStrides(newDims.size(), 1); for (size_t i = 0; i < newDims.size(); ++i) { for (size_t j = i + 1; j < newDims.size(); ++j) { newStrides[i] *= newDims[j]; } } std::shared_ptr<TensorImpl> newImpl = Registrar<Tensor>::create({mImpl->backend(), mDataType})(mImpl->device().second, newDims); std::vector<size_t> indices(newDims.size(), 0); for (size_t i = 0; i < src.size(); ++i) { size_t idx = 0; // Permute indices based on OutputDimsOrder attr for (int j = newDims.size() -1; j >=0; --j) { idx += indices[transpose[j]] * newStrides[j]; } // Copy the value in output newImpl->copy(src.getImpl()->rawPtr(i), 1, idx); // Update indices for the next iteration for (int j = newDims.size() - 1; j >= 0; --j) { if (indices[j] < src.dims()[j] - 1) { indices[j]++; break; } else { indices[j] = 0; } } } resize(newDims); setImpl(newImpl); } void Aidge::Tensor::copyTranspose(const Tensor& src, const DataFormatTranspose& transpose) { copyTranspose(src, std::vector<DimSize_t>(transpose.begin(), transpose.end())); } void Aidge::Tensor::copyCastFrom(const Tensor& src, std::shared_ptr<Tensor>& movedSrcPtr) { if (&src == this) { return; } AIDGE_ASSERT(src.isContiguous(), "cannot copy-cast from non-contiguous tensor"); // Current Tensor has necessarily a data type, but may not have backend if (!getImpl()) { // If no backend was set for the current tensor, use the same as src const auto deviceSrc = src.getImpl()->device(); setBackend(deviceSrc.first, deviceSrc.second); } resize(src.dims()); if (dataType() != src.dataType()) { // First move data to the target device (only if needed) const auto device = getImpl()->device(); const Tensor& movedSrc = src.refFrom(movedSrcPtr, device.first, device.second); // Second, copy-cast data (necessary) getImpl()->copyCast(movedSrc.getImpl()->rawPtr(movedSrc.mImplOffset), movedSrc.dataType(), movedSrc.size(), mImplOffset); } else { // Directly copy, no conversion necessary // Avoid making a double copy if both data type and device are the same getImpl()->copyFrom(*(src.getImpl()), src.size(), src.mImplOffset, mImplOffset); } } Aidge::Tensor& Aidge::Tensor::refContiguous(std::shared_ptr<Tensor>& fallback) { // Scott Meyers' solution to avoid code duplication return const_cast<Tensor&>( static_cast<const Tensor&>(*this).refContiguous(fallback)); } const Aidge::Tensor& Aidge::Tensor::refContiguous( std::shared_ptr<Tensor>& fallback) const { AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot refCast() it"); if (isContiguous()) { return *this; } else { if (this != fallback.get()) { // Shallow copy to fallback *fallback = *this; } // Make fallback contiguous fallback->makeContiguous(); return *fallback; } } Aidge::Tensor& Aidge::Tensor::refCast(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt) { // Scott Meyers' solution to avoid code duplication return const_cast<Tensor&>( static_cast<const Tensor&>(*this).refCast(fallback, dt)); } const Aidge::Tensor& Aidge::Tensor::refCast(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt) const { AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot refCast() it"); if (dt == dataType()) { return *this; } else { if (this == fallback.get()) { // if refFrom() was called before, just change the type fallback->setDataType(dt); } else { AIDGE_ASSERT(isContiguous(), "cannot refCast non-contiguous tensor"); if (!fallback) { fallback = std::make_shared<Tensor>(dt); } else { fallback->setDataType( dt, false); // don't keep previous data (no copy) } const auto device = getImpl()->device(); fallback->setBackend(device.first, device.second, false); // don't keep previous data (no copy) fallback->resize(dims()); fallback->getImpl()->copyCast(getImpl()->rawPtr(mImplOffset), dataType(), size(), fallback->mImplOffset); } return *fallback; } } Aidge::Tensor& Aidge::Tensor::refFrom(std::shared_ptr<Tensor>& fallback, const std::string& backend, DeviceIdx_t device) { // Scott Meyers' solution to avoid code duplication return const_cast<Tensor&>( static_cast<const Tensor&>(*this).refFrom(fallback, backend, device)); } const Aidge::Tensor& Aidge::Tensor::refFrom(std::shared_ptr<Tensor>& fallback, const std::string& backend, DeviceIdx_t device) const { AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot refFrom() it"); if (std::make_pair(backend, device) == getImpl()->device()) { return *this; } else { if (this == fallback.get()) { // if refCast() was called before, just change the backend fallback->setBackend(backend, device); } else { AIDGE_ASSERT(isContiguous(), "cannot refFrom non-contiguous tensor"); if (!fallback) { fallback = std::make_shared<Tensor>(dataType()); } else { fallback->setDataType( dataType(), false); // don't keep previous data (no copy) } fallback->setBackend(backend, device, false); // don't keep previous data (no copy) fallback->resize(dims()); fallback->getImpl()->copyFrom(*getImpl(), size(), mImplOffset, fallback->mImplOffset); } return *fallback; } } Aidge::Tensor& Aidge::Tensor::ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string& backend, DeviceIdx_t device) { // Scott Meyers' solution to avoid code duplication return const_cast<Tensor&>( static_cast<const Tensor&>(*this).ref(fallback, dt, backend, device)); } const Aidge::Tensor& Aidge::Tensor::ref(std::shared_ptr<Tensor>& fallback, const Aidge::DataType& dt, const std::string& backend, DeviceIdx_t device) const { AIDGE_ASSERT(getImpl(), "no backend was set for tensor, cannot ref() it"); if (dt == dataType() && std::make_pair(backend, device) == getImpl()->device()) { return *this; } else { // Change fallback type, backend & device, without any data copy if (!fallback) { fallback = std::make_shared<Tensor>(dt); } else { fallback->setDataType(dt, false); // don't keep previous data (no copy) } fallback->setBackend(backend, device, false); // don't keep previous data (no copy) fallback->resize(dims()); return *fallback; } } std::set<std::string> Aidge::Tensor::getAvailableBackends() { std::set<std::string> backendsList; for (const auto& tupleKey : Registrar<Tensor>::getKeys()) backendsList.insert(std::get<0>(tupleKey)); return backendsList; }