Skip to content
Snippets Groups Projects
Commit cb7c3c4d authored by Maxence Naud's avatar Maxence Naud Committed by Maxence Naud
Browse files

Reduce 'cpu/TensorImpl.cpp' compiled obj size from 545KB to 504KB

parent 48a0df4a
No related branches found
No related tags found
3 merge requests!414Update version 0.5.1 -> 0.6.0,!408[Add] Dropout Operator,!361Move code from header to source
......@@ -17,7 +17,6 @@
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
......@@ -50,42 +49,24 @@ public:
void zeros() override final;
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cpu<{}>::copy(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
const T* srcT = static_cast<const T *>(src);
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(dstT < srcT || dstT >= srcT + length, "TensorImpl_cpu<{}>::copy(): overlapping copy is not supported", typeid(T).name());
std::copy_n(srcT, length, dstT);
}
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final;
void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) override final;
void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(device.first == Backend, "TensorImpl_cpu<{}>::copyFromDevice(): backend must match", typeid(T).name());
AIDGE_ASSERT(device.second == 0, "TensorImpl_cpu<{}>::copyFromDevice(): device ({}) cannot be != 0 for CPU backend", typeid(T).name(), device.second);
copy(src, length, offset);
}
void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override final;
inline void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
copy(src, length, offset);
}
void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override final {
const T* src = static_cast<const T*>(rawPtr(offset));
AIDGE_ASSERT(offset + length <= mData.size(), "TensorImpl_cpu<{}>::copy(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mData.size());
std::copy(src, src + length, static_cast<T *>(dst));
}
void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override final;
void *rawPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
const void *rawPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mNbElts, "TensorImpl_cpu<{}>::rawPtr(): accessing uninitialized const rawPtr", typeid(T).name());
return (mData.data() + offset);
};
const void *rawPtr(NbElts_t offset = 0) const override final;
void *hostPtr(NbElts_t offset = 0) override final {
lazyInit();
......@@ -97,23 +78,12 @@ public:
return (mData.data() + offset);
};
void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mNbElts, "TensorImpl_cpu<{}>::setRawPtr(): trying to set raw pointer (length: {}) of insufficient capacity (required: {})", typeid(T).name(), length, mNbElts);
mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset();
};
void setRawPtr(void *ptr, NbElts_t length) override final;
virtual ~TensorImpl_cpu() = default;
private:
void lazyInit() {
if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "TensorImpl_cpu<{}>: trying to enlarge non-owned data", typeid(T).name());
mDataOwner.reset(new T[mNbElts]);
mData = future_std::span<T>(mDataOwner.get(), mNbElts);
}
}
void lazyInit();
};
......
......@@ -21,9 +21,28 @@
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
static void copyAssertions(const void* src, void* dst, std::size_t nbElts, std::size_t capacity) {
AIDGE_ASSERT((dst < src) || (static_cast<const char*>(src) + nbElts <= dst), "Overlapping copy is not supported");
AIDGE_ASSERT(nbElts <= capacity, "Number of elements to copy is above implementation current capacity ({}B > {}B)", nbElts, capacity);
}
/**
* @brief Copy data from src to dst arrays.
*
* @param src source array from where ata are copied.
* @param dst destination array where data are copied to.
* @param nbElts nbof elements to copy from src to dst.
* @param capacity Size of the dst array.
*/
static void copyCharData(const void* src, void* dst, std::size_t nbElts, std::size_t capacity) {
copyAssertions(src, dst, nbElts, capacity);
std::memcpy(dst, src, nbElts);
}
template <typename T>
bool Aidge::TensorImpl_cpu<T>::operator==(const Aidge::TensorImpl &other) const {
bool TensorImpl_cpu<T>::operator==(const TensorImpl &other) const {
const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T>&>(other);
AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mNbElts);
......@@ -37,12 +56,29 @@ bool Aidge::TensorImpl_cpu<T>::operator==(const Aidge::TensorImpl &other) const
}
template <typename T>
void Aidge::TensorImpl_cpu<T>::zeros() {
void TensorImpl_cpu<T>::zeros() {
std::memset(rawPtr(), T(0), mNbElts * sizeof(T));
}
template <typename T>
void Aidge::TensorImpl_cpu<T>::copyCast(const void *src, const Aidge::DataType srcDt, Aidge::NbElts_t length, Aidge::NbElts_t offset) {
void TensorImpl_cpu<T>::copy(const void *src, NbElts_t length, NbElts_t offset) {
copyCharData(src, rawPtr(offset), length*sizeof(T), (mNbElts - offset)*sizeof(T));
}
template <typename T>
void TensorImpl_cpu<T>::copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset) {
AIDGE_ASSERT(device.first == Backend, "TensorImpl_cpu<{}>::copyFromDevice(): backend must match", typeid(T).name());
AIDGE_ASSERT(device.second == 0, "TensorImpl_cpu<{}>::copyFromDevice(): device ({}) cannot be != 0 for CPU backend", typeid(T).name(), device.second);
copy(src, length, offset);
}
template <typename T>
void TensorImpl_cpu<T>::copyToHost(void *dst, NbElts_t length, NbElts_t offset) const {
copyCharData(rawPtr(offset), dst, length*sizeof(T), (mData.size() - offset)*sizeof(T));
}
template <typename T>
void TensorImpl_cpu<T>::copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset) {
if (length == 0) {
return;
}
......@@ -52,8 +88,7 @@ void Aidge::TensorImpl_cpu<T>::copyCast(const void *src, const Aidge::DataType s
switch (srcDt)
{
case DataType::Float64:
std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
dstT);
std::copy_n(static_cast<const double*>(src), length, dstT);
break;
case DataType::Float32:
std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length,
......@@ -155,4 +190,29 @@ void Aidge::TensorImpl_cpu<T>::copyCast(const void *src, const Aidge::DataType s
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type.");
break;
}
}
\ No newline at end of file
}
template <typename T>
const void* TensorImpl_cpu<T>::rawPtr(NbElts_t offset) const {
AIDGE_ASSERT(mData.size() >= mNbElts, "TensorImpl_cpu<{}>::rawPtr(): accessing uninitialized const rawPtr", typeid(T).name());
return (mData.data() + offset);
};
template <typename T>
void TensorImpl_cpu<T>::setRawPtr(void *ptr, NbElts_t length) {
AIDGE_ASSERT(length >= mNbElts, "TensorImpl_cpu<{}>::setRawPtr(): trying to set raw pointer (length: {}) of insufficient capacity (required: {})", typeid(T).name(), length, mNbElts);
mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset();
}
template <typename T>
void TensorImpl_cpu<T>::lazyInit() {
if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "TensorImpl_cpu<{}>: trying to enlarge non-owned data", typeid(T).name());
mDataOwner.reset(new T[mNbElts]);
mData = future_std::span<T>(mDataOwner.get(), mNbElts);
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment