Skip to content
Snippets Groups Projects
Commit f4c43da9 authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

Merge branch 'master' of...

Merge branch 'master' of https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu into vit_operators
parents 764923ca cc621da3
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!20Vit operators
Showing
with 968 additions and 299 deletions
......@@ -136,70 +136,70 @@ build:ubuntu_python:
paths:
- venv/
build:windows_cpp:
stage: build
needs: []
tags:
- windows
image: buildtools
before_script:
# Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install git -Y
- choco install python -Y
# Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script:
# Download dependencies
# aidge_core
- 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip'
- Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
- Remove-Item .\build_cpp\ -Recurse
- $env:CMAKE_PREFIX_PATH = '../install_cpp'
- mkdir -p build_cpp
- cd build_cpp
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
- cmake --build . -j2
- cmake --install . --config Debug
artifacts:
expire_in: 1 week
paths:
- build_cpp/
- install_cpp/
build:windows_python:
stage: build
needs: []
tags:
- windows
image: buildtools
before_script:
# Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install git -Y
- choco install python -Y
# Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script:
# Download dependencies
# aidge_core (Python)
- 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip'
- Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
- python -m pip install virtualenv
- virtualenv venv
- venv\Scripts\Activate.ps1
- python -m pip install -r requirements.txt
- python -m pip install .
artifacts:
expire_in: 1 week
paths:
- venv/
# build:windows_cpp:
# stage: build
# needs: []
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install git -Y
# - choco install python -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# # Download dependencies
# # aidge_core
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# - Remove-Item .\build_cpp\ -Recurse
# - $env:CMAKE_PREFIX_PATH = '../install_cpp'
# - mkdir -p build_cpp
# - cd build_cpp
# - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
# - cmake --build . -j2
# - cmake --install . --config Debug
# artifacts:
# expire_in: 1 week
# paths:
# - build_cpp/
# - install_cpp/
# build:windows_python:
# stage: build
# needs: []
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install git -Y
# - choco install python -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# # Download dependencies
# # aidge_core (Python)
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# - python -m pip install virtualenv
# - virtualenv venv
# - venv\Scripts\Activate.ps1
# - python -m pip install -r requirements.txt
# - python -m pip install .
# artifacts:
# expire_in: 1 week
# paths:
# - venv/
......@@ -27,23 +27,23 @@ test:ubuntu_python:
reports:
junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml
test:windows_cpp:
stage: test
needs: ["build:windows_cpp"]
tags:
- windows
image: buildtools
before_script:
# Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install python -Y
# Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script:
- cd build_cpp
- ctest --output-junit ctest-results.xml --output-on-failure
artifacts:
reports:
junit: build_cpp/ctest-results.xml
# test:windows_cpp:
# stage: test
# needs: ["build:windows_cpp"]
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install python -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# - cd build_cpp
# - ctest --output-junit ctest-results.xml --output-on-failure
# artifacts:
# reports:
# junit: build_cpp/ctest-results.xml
......@@ -16,7 +16,7 @@ class test_tensor(unittest.TestCase):
self.assertTrue("cpu" in aidge_core.Tensor.get_available_backends())
def test_numpy_int_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3)
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int32)
......@@ -35,6 +35,16 @@ class test_tensor(unittest.TestCase):
for i,j in zip(t.dims(), nnarray.shape):
self.assertEqual(i,j)
def test_numpy_int64_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int64)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int64)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n)
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_numpy_float_to_tensor(self):
t = aidge_core.Tensor()
np_array = np.random.rand(1, 1, 3, 3).astype(np.float32)
......@@ -49,7 +59,7 @@ class test_tensor(unittest.TestCase):
def test_get_set(self):
dims = [2,2,2]
np_array = np.arange(8).reshape(dims)
np_array = np.arange(8).reshape(dims).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
for i in range(8):
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
#define AIDGE_CPU_DATA_TENSORIMPL_H_
#include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
template <class T>
class TensorImpl_cpu : public TensorImpl {
private:
private:
const Tensor &mTensor; // Impl needs to access Tensor information, but is not
// supposed to change it!
std::vector<T> mData;
/// Pointer to the data and its capacity
future_std::span<T> mData;
/// If this instance own the data, std::unique_ptr manages it
std::unique_ptr<T[]> mDataOwner;
public:
public:
static constexpr const char *Backend = "cpu";
TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
bool operator==(const TensorImpl &otherImpl) const override final {
const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl);
AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mTensor.size());
std::size_t i = 0;
for (; i < mTensor.size() &&
mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
++i) {
*(mData.data()+i) == *static_cast<const T*>(typedOtherImpl.rawPtr(i));
++i) {
}
return i == mTensor.size();
}
......@@ -32,37 +53,130 @@ class TensorImpl_cpu : public TensorImpl {
return std::make_unique<TensorImpl_cpu<T>>(tensor);
}
// native interface
const std::vector<T> &data() const { return mData; }
inline std::size_t size() const noexcept override final { return mData.size(); }
inline std::size_t scalarSize() const noexcept override final { return sizeof(T); }
std::size_t scalarSize() const override { return sizeof(T); }
void setDevice(DeviceIdx_t device) override final {
AIDGE_ASSERT(device == 0, "device cannot be != 0 for CPU backend");
}
void copy(const void *src, NbElts_t length) override {
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity");
std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
static_cast<T *>(rawPtr()));
static_cast<T *>(rawPtr()) + offset);
}
void copyCast(const void *src, NbElts_t length, const DataType srcDt) override final {
if (length == 0) {
return;
}
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity");
switch (srcDt)
{
case DataType::Float64:
std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Float32:
std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Float16:
std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Int64:
std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::UInt64:
std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Int32:
std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::UInt32:
std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Int16:
std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::UInt16:
std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::Int8:
std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
case DataType::UInt8:
std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length,
static_cast<T *>(rawPtr()));
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type.");
break;
}
}
void *rawPtr() override {
lazyInit(mData);
return mData.data();
void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override final {
AIDGE_ASSERT(device.first == Backend, "backend must match");
AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend");
copy(src, length);
}
inline void copyFromHost(const void *src, NbElts_t length) override final {
copy(src, length);
}
void copyToHost(void *dst, NbElts_t length) const override final {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity");
const T* src = static_cast<const T*>(rawPtr());
std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
static_cast<T *>(dst));
}
void *rawPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
void* getRaw(std::size_t idx){
return static_cast<void*>(static_cast<T *>(rawPtr()) + idx);
};
const void *rawPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const rawPtr");
return (mData.data() + offset);
};
virtual ~TensorImpl_cpu() = default;
void *hostPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
void setRawPtr(void *ptr) override final {
T *newPtr = static_cast<T *>(ptr);
mData = std::vector<T>(newPtr, newPtr + mTensor.size());
const void *hostPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const hostPtr");
return (mData.data() + offset);
};
private:
void lazyInit(std::vector<T> &data) {
assert(mTensor.dataType() == NativeType<T>::type);
void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity");
mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset();
};
if (data.size() != mTensor.size()) data.resize(mTensor.size());
virtual ~TensorImpl_cpu() = default;
private:
void lazyInit() {
if (mData.size() < mTensor.size()) {
// Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data");
mDataOwner.reset(new T[mTensor.size()]);
mData = future_std::span<T>(mDataOwner.get(), mTensor.size());
}
}
};
......@@ -71,8 +185,12 @@ static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
{"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
{"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float16(
{"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
{"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int64(
{"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<long>::create);
} // namespace
} // namespace Aidge
......
......@@ -39,19 +39,7 @@ public:
return std::make_unique<AddImpl_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
};
......
......@@ -27,11 +27,12 @@ void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector
}
O* output = static_cast<O*>(output_);
for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] += inputs[iIndex][oIndex];
}
}
for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
output[oIndex] = 0;
for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
output[oIndex] += inputs[iIndex][oIndex];
}
}
}
namespace {
......
......@@ -61,6 +61,7 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
......
......@@ -14,6 +14,7 @@
#include "aidge/utils/Registrar.hpp"
#include "aidge/data/half.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
......@@ -151,6 +152,9 @@ namespace {
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float16(
{DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
......
......@@ -12,72 +12,69 @@
#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include <vector>
#include <cstddef>
#include <vector>
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I>
void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
const std::vector<std::size_t> inputDims,
const void* input_,
void* output_) {
const std::vector<std::size_t> inputDims,
const void* input_,
void* output_) {
std::vector<std::size_t> slicedDims = inputDims;
std::size_t beginning = 0;
DimSize_t nbAxes = std::get<2>(attrs).size();
for(std::size_t i=0; i<nbAxes;++i)
{
for (std::size_t i = 0; i < nbAxes; ++i) {
// For each slice operation get the params and cast them to size_t
std::int64_t axis_ = std::get<2>(attrs)[i];
std::int64_t start_ = std::get<0>(attrs)[i];
std::int64_t end_ = std::get<1>(attrs)[i];
std::size_t axis = axis_>=0?axis_:axis_+inputDims.size();
std::size_t start = start_>=0?start_:start_+inputDims[axis];
std::size_t end = end_>=0?end_:end_+inputDims[axis];
std::size_t stride=1;
for(std::size_t j = inputDims.size()-1; j>axis; --j)
stride*=inputDims[j];
const std::int64_t axis_ = std::get<2>(attrs)[i];
const std::int64_t start_ = std::get<0>(attrs)[i];
const std::int64_t end_ = std::get<1>(attrs)[i];
const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_) + inputDims.size();
const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis];
const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis];
std::size_t stride = 1;
for (std::size_t j = inputDims.size() - 1; j > axis; --j) stride *= inputDims[j];
beginning += start * stride;
std::size_t sliceLength = end - start + 1;
const std::size_t sliceLength = end - start + 1;
slicedDims[axis] = sliceLength;
}
const I* input = static_cast<const I*>(input_) + beginning;
I* output = static_cast<I*>(output_);
const std::size_t nbDims = slicedDims.size();
// for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3}
// for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3}
std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims);
for (std::size_t i = 0; i < nbDims; ++i) {
substractedDims[i] = inputDims[i] - slicedDims[i];
}
// for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1}
// for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1}
std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims);
std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims+1);
prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1];
prodInputDims[nbDims - 1] = inputDims[nbDims - 1];
prodInputDims[nbDims] = 1;
for (std::size_t i = 2; i <= nbDims; ++i) {
prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1]*slicedDims[nbDims - i];
prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1]*inputDims[nbDims - i];
}
std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims + 1);
prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1];
prodInputDims[nbDims - 1] = inputDims[nbDims - 1];
prodInputDims[nbDims] = 1;
for (std::size_t i = 2; i <= nbDims; ++i) {
prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1] * slicedDims[nbDims - i];
prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1] * inputDims[nbDims - i];
}
std::size_t j = 0;
std::size_t i = 0;
for (; j < prodSlicedDims[0];) {
output[j] = input[i++];
std::size_t j = 0;
std::size_t i = 0;
for (; j < prodSlicedDims[0];) {
output[j] = input[i++];
++j;
for (std::size_t idx = nbDims - 1; idx > 0; --idx) {
i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0;
}
}
for (std::size_t idx = nbDims - 1; idx > 0; --idx) {
i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx] * prodInputDims[idx + 1] : 0;
}
}
}
namespace {
......
......@@ -21,46 +21,11 @@
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getRawInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
// this implementation can be in-place
return 0;
}
Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const {
assert(inputIdx < mNbConsumedData.size());
return mNbConsumedData[inputIdx];
}
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const {
assert(outputIdx < mNbProducedData.size());
return mNbProducedData[outputIdx];
}
void Aidge::AddImpl_cpu::updateConsummerProducer() {
for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
mNbProducedData[0]+= getRequiredMemory(0, {});
}
void Aidge::AddImpl_cpu::forward() {
assert(mOp.getRawInput(0) && "missing input in Add operator");
DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType();
......@@ -69,16 +34,36 @@ void Aidge::AddImpl_cpu::forward() {
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
}
auto kernelFunc = Registrar<AddImplForward_cpu>::create({
// Find the correct kernel type
const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
const Registrar<AddImplForward_cpu>::registrar_key registrarKey = {
datatypeFirstInput,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
outputDataType};
Registrar<AddImplForward_cpu>::registrar_type kernelFunc;
if (Registrar<AddImplForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<AddImplForward_cpu>::create({
outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::vector<const void*> opInputs;
std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs());
for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
opInputs.push_back(getCPUPtr(mOp.getRawInput(i)));
const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCastFrom(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
opInputs.push_back(input.getImpl()->rawPtr());
}
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
opInputs,
getCPUPtr(mOp.getRawOutput(0)));
}
\ No newline at end of file
}
......@@ -34,14 +34,35 @@ void Aidge::ConvImpl2D_cpu::forward() {
assert(mOp.getRawInput(2) && "missing input #2");
// Find the correct kernel type
auto kernelFunc =
Registrar<ConvImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = {
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
outputDataType};
Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc;
if (Registrar<ConvImpl2DForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<ConvImpl2DForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<ConvImpl2DForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
// Call kernel
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawOutput(0)));
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
}
......@@ -29,29 +29,37 @@ void Aidge::FCImpl_cpu::forward()
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2");
// Find the correct kernel type
auto kernelFunc = Registrar<FCImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
outputDataType};
Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
if (Registrar<FCImplForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<FCImplForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
// Call kernel
// if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
// getCPUPtr(mOp.getRawInput(0),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// mOp.getOutput(0)->getImpl()->rawPtr());
// }
// else
kernelFunc(
dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawInput(2)),
kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
input0.dims()[0],
input0.size() / input0.dims()[0],
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
}
......@@ -47,7 +47,7 @@ void Aidge::MatMulImpl_cpu::forward()
kernelFunc(
dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
......
......@@ -38,6 +38,7 @@ void Aidge::SoftmaxImpl_cpu::forward() {
Softmax_Op::Attrs attr = dynamic_cast<const Softmax_Op&>(mOp).getStaticAttributes();
const int& axisIdx = static_cast<const int&>(std::get<0>(attr));
// Call kernel
kernelFunc(axisIdx,
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
......
......@@ -45,7 +45,7 @@ TEST_CASE("Tensor creation") {
REQUIRE(x.get<int>({0, 0, 1}) == 2);
REQUIRE(x.get<int>({0, 1, 1}) == 4);
REQUIRE(x.get<int>({1, 1, 0}) == 7);
x.get<int>({1, 1, 1}) = 36;
x.set<int>({1, 1, 1}, 36);
REQUIRE(x.get<int>({1, 1, 1}) == 36);
}
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cmath>
#include <cstdlib>
#include <memory>
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/MetaOperator.hpp"
#include "aidge/operator/MetaOperatorDefs.hpp"
#include "aidge/operator/Pad.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] MetaOperator/PaddedConv(forward)", "[MetaOperator][PaddedConv][CPU]") {
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(
Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
{1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
{2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
{{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
{6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
{4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
{{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
{1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
{4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
{{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
{1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
{6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
{{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
{1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
{1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
{{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
{7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
{8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
{{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
{7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
{9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
{{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
{1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
{1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
{{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
{5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
{1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
{{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
{8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
{8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
{{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
{5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
{5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
{{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
{7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
{1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(
Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{
// NCHW
{{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127},
{0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607},
{0.85065842, 0.88226201, 0.54971951, 0.23360494, 0.53907884},
{0.33423098, 0.79564312, 0.80419414, 0.76839638, 0.87248221},
{0.77328729, 0.65749407, 0.47277589, 0.32889198, 0.93970518}},
{{0.66669145, 0.64193351, 0.45315988, 0.32794057, 0.38461822},
{0.72295814, 0.18395073, 0.85909664, 0.30010301, 0.56065865},
{0.34777938, 0.77869746, 0.33159421, 0.19540932, 0.77767906},
{0.5778391, 0.08218411, 0.27758371, 0.99017749, 0.61827997},
{0.10440745, 0.3197831, 0.89157608, 0.12216887, 0.950232}},
{{0.68073443, 0.2681118, 0.51848834, 0.62864493, 0.36717478},
{0.64106244, 0.43779425, 0.02771029, 0.78275231, 0.45693104},
{0.6487417, 0.01603838, 0.73869997, 0.96494221, 0.39588782},
{0.5975827, 0.90913292, 0.55036969, 0.4747373, 0.62460509},
{0.79675124, 0.02807549, 0.53227602, 0.88805927, 0.96646591}}},
{{{0.81851935, 0.21267665, 0.01580692, 0.54907998, 0.89010049},
{0.80165784, 0.55195592, 0.20740314, 0.22782844, 0.89205031},
{0.94217108, 0.58434542, 0.20738313, 0.79065873, 0.9371597},
{0.02254708, 0.95539178, 0.95165758, 0.53736666, 0.49100362},
{0.08018625, 0.69108027, 0.00329741, 0.74565761, 0.30899213}},
{{0.34868638, 0.12792604, 0.37382248, 0.0374756, 0.50653087},
{0.59614405, 0.64820746, 0.31470307, 0.62460364, 0.29253268},
{0.92864889, 0.51014224, 0.08921206, 0.11094072, 0.64691121},
{0.50586371, 0.6686477, 0.72511169, 0.41681783, 0.6325049},
{0.71594137, 0.73382767, 0.36589439, 0.03255165, 0.75006865}},
{{0.6294127, 0.85548534, 0.0902963, 0.28915773, 0.36564289},
{0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958},
{0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177},
{0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434},
{0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
{4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
{5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
{4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
{2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
{{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
{4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
{4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
{4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
{2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
{{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
{5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
{4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
{4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
{2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
{{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
{4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
{3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
{4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
{2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
{{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
{6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
{5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
{6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
{3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
{{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
{5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
{5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
{5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
{3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
{{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
{5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
{5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
{5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
{3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
{{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
{4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
{5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
{5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
{3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}});
std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
std::shared_ptr<Node> myPad =
Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0);
auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
convOp->setInput(1, myWeights);
convOp->setInput(2, myBias);
myPad->addChild(myConv, 0, 0);
padOp->setInput(0, myInput);
padOp->setDataType(DataType::Float64);
padOp->setBackend("cpu");
padOp->computeOutputDims();
convOp->setDataType(DataType::Float64);
convOp->setBackend("cpu");
convOp->computeOutputDims();
myPad->forward();
myConv->forward();
convOp -> getOutput(0) -> print();
double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr());
double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i < myOutput->size(); ++i) {
REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
}
std::shared_ptr<Node> myPaddedConv =
PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1});
}
\ No newline at end of file
......@@ -16,76 +16,151 @@
#include "aidge/backend/cpu.hpp"
#include <memory>
#include <iostream>
using namespace Aidge;
TEST_CASE("[cpu/operator] Slice(forward)") {
TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
SECTION("1D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
{0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,4> {
{0, 1, 2,-3}
});
std::shared_ptr<Node> mySlice = Slice({0}, {3}, {0});
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySlice->forward();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("2D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<int,2,4> {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
{
{1, 2, 3, 4},
{5, 6, 7, 8}
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,1,3> {
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> {
{
{6, 7, 8}
{-5,-6, 7},
{-5,-6, 7}
}
});
std::shared_ptr<Node> mySlice = Slice({1, 1}, {1, 3}, {0, 1});
std::shared_ptr<Node> mySlice = Slice({0,5}, {1,7}, {0,1});
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
op->associateInput(0, input);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySlice->forward();
// mySlice->getOperator()->output(0).print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("3D Tensor") {
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array3D<int,2,4,3> {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
{
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> {
{
{
{ 4,-5,-6}
}
}
});
std::shared_ptr<Node> mySlice = Slice({0,1,4}, {0,1,6}, {0,1,2});
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySlice->forward();
// mySlice->getOperator()->output(0).print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
{
{
{1, 2, 3},
{4, 5, 6},
{7, 8, 9},
{10, 11, 12}
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{13, 14, 15},
{16, 17, 18},
{18, 20, 21},
{22, 23, 24}
{
{ 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3,11,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,3,1> {
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
{
{
{3},
{6},
{9}
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
}
},
{
{15},
{18},
{21}
{
{ 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
{-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
},
{
{ 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
{-5, 4, 2,-3,11,-5,-6, 7,-1,10}
}
}
}
});
std::shared_ptr<Node> mySlice = Slice({0, 2}, {2, 2}, {1, 2});
std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,9}, {0,1,2,3});
auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
op->associateInput(0, input);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
mySlice->getOperator()->associateInput(0,input0);
mySlice->getOperator()->setDataType(DataType::Int32);
mySlice->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySlice->forward();
// mySlice->getOperator()->output(0).print();
REQUIRE(*(op->getOutput(0)) == *expectedOutput);
REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
}
}
\ No newline at end of file
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipies/Recipies.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include <cstddef>
using namespace Aidge;
TEST_CASE("[ExplicitCastMove] conv") {
auto conv1 = Conv(3, 32, {3, 3}, "conv1");
auto conv2 = Conv(32, 64, {3, 3}, "conv2");
auto conv3 = Conv(64, 10, {1, 1}, "conv3", {2, 2});
auto g1 = Sequential({
Producer({16, 3, 224, 224}, "dataProvider"),
conv1,
conv2,
conv3
});
g1->setBackend("cpu");
conv1->getOperator()->setDataType(DataType::Int32);
conv3->getOperator()->setDataType(DataType::Float64);
g1->save("explicitCastMove_before");
REQUIRE(g1->getNodes().size() == 10);
g1->forwardDims();
explicitCastMove(g1);
g1->save("explicitCastMove_after");
REQUIRE(g1->getNodes().size() == 13);
}
......@@ -183,26 +183,4 @@ TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") {
}
}
}
}
// std::shared_ptr<GraphView> g = Sequential({
// Conv(3, 16, {3,3}, "conv1"),
// ReLU("relu1"),
// Conv(16, 32, {1,1}, "conv2"),
// Conv(32, 16, {1,1}, "conv3"),
// Conv(16, 10, {3,3}, "conv4"),
// ReLU("relu2")
// });
// for (auto& individualConv : g->match("Conv")) {
// auto tiledConv = horizontalTiling(individualConv);
// g->replace(individualConv, tiledConv);
// }
// }
// SECTION("Create the GraphView with tiled layers") {
// std::shared_ptr<GraphView> g;
// g->addChild(horizontalTiling(Conv()))
// }
// }
// } // namespace Aidge
\ No newline at end of file
} // namespace Aidge
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <memory>
#include <string>
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/graph/GraphView.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/Scheduler.hpp"
#include "aidge/recipies/Recipies.hpp"
#include "aidge/backend/cpu.hpp"
using namespace Aidge;
TEST_CASE("[cpu/castmove] CastMove(forward)") {
std::shared_ptr<Tensor> inputTensor =
std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
{5, 6, 7, 8, 9},
{10, 11, 12, 13, 14},
{15, 16, 17, 18, 19},
{20, 21, 22, 23, 24}}},
{{{25, 26, 27, 28, 29},
{30, 31, 32, 33, 34},
{35, 36, 37, 38, 39},
{40, 41, 42, 43, 44},
{45, 46, 47, 48, 49}}}}});
std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>(
Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
{{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
{{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
SECTION("Test implicit") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
g->setBackend("cpu");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
}
SECTION("Half") {
Tensor refTensor = Array2D<float, 3, 2>{{{0.0, 1.0},{2.1, 3.4},{5000.0, 1.0e5}}};
Tensor tensor(DataType::Float16);
tensor.copyCastFrom(refTensor);
REQUIRE(approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0));
}
SECTION("Test explicit") {
std::shared_ptr<GraphView> g =
Sequential({
Conv(1, 3, {3, 3}, "conv1"),
Conv(3, 4, {1, 1}, "conv2"),
Conv(4, 3, {1, 1}, "conv3"),
FC(27, 5, false, "fc")});
g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
g->getNode("conv1")->getOperator()->setInput(1, weight1);
g->getNode("conv1")->getOperator()->setInput(2, bias1);
std::shared_ptr<Tensor> weight2 =
std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
{{{4}}, {{5}}, {{6}}},
{{{7}}, {{8}}, {{9}}},
{{{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
g->getNode("conv2")->getOperator()->setInput(1, weight2);
g->getNode("conv2")->getOperator()->setInput(2, bias2);
// *(g->getNode("conv2")->getOperator()->input(1, weight2);
std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
{{{5}}, {{6}}, {{7}}, {{8}}},
{{{9}}, {{10}}, {{11}}, {{12}}}}});
std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
g->getNode("conv3")->getOperator()->setInput(1, weight3);
g->getNode("conv3")->getOperator()->setInput(2, bias3);
std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
Array2D<int, 5, 27>{{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
{10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
{7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3},
{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
g->getNode("fc")->getOperator()->setInput(1, weightfc);
g->getNode("fc")->getOperator()->setInput(2, biasfc);
// input->addChild(g);
g->setDataType(Aidge::DataType::Int32);
g->getNode("conv1")->getOperator()->setDataType(DataType::Float32);
g->getNode("conv3")->getOperator()->setDataType(DataType::Float64);
explicitCastMove(g);
g->setBackend("cpu");
g->forwardDims();
SequentialScheduler scheduler(g);
REQUIRE_NOTHROW(scheduler.forward());
scheduler.saveSchedulingDiagram("schedulingSequential");
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
{{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
{{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
{{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
{{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
{{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
{{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
{{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
{{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
{{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
{{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
{{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
{{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
{{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
{{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
{{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
{{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
{{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
{{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
{{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
Tensor expectedOutput4 = Array2D<int, 2, 5>{
{{205050376, 198925904, 181355097, 196978090, 238868348},
{598467376, 561797804, 560823897, 593043790, 698672948}}};
std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment