Skip to content
Snippets Groups Projects
Commit 4352dddc authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'master' into tiling

parents c215a4ca c20897c1
No related branches found
No related tags found
2 merge requests!22Update operators implementation,!16Draft: Tiling
Pipeline #32955 failed
Showing
with 368 additions and 57 deletions
......@@ -23,6 +23,98 @@ build:ubuntu_cpp:
- build_cpp/
- install_cpp/
build:ubuntu_cpp_g++10:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
# aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
- unzip -o build_artifacts.zip -d .
- rm -rf build_cpp
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y g++-10
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/g++-10
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_g++12:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
# aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
- unzip -o build_artifacts.zip -d .
- rm -rf build_cpp
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y g++-12
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/g++-12
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_clang12:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
# aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
- unzip -o build_artifacts.zip -d .
- rm -rf build_cpp
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y clang-12
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/clang++-12
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_clang15:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
# aidge_core
- 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
- unzip -o build_artifacts.zip -d .
- rm -rf build_cpp
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y clang-15
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/clang++-15
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_python:
stage: build
needs: []
......@@ -84,3 +176,42 @@ build:windows_cpp:
paths:
- build_cpp/
- install_cpp/
build:windows_python:
stage: build
needs: []
tags:
- windows
image: buildtools
before_script:
# Install Chocolatey
- Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# Install dependencies
- choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
- choco install git -Y
- choco install python -Y
# Update PATH
- $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
script:
# Download dependencies
# aidge_core (CPP)
- 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip'
- Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
- Remove-Item .\build_cpp\ -Recurse
# aidge_core (Python)
- 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip'
- Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
- python -m pip install virtualenv
- virtualenv venv
- venv\Scripts\Activate.ps1
# Numpy dependancy for unit test
- python -m pip install numpy
- $env:AIDGE_INSTALL = "$pwd" + "install"
- $env:CMAKE_PREFIX_PATH = "../install_cpp"
- python -m pip install .
artifacts:
expire_in: 1 week
paths:
- venv/
......@@ -13,7 +13,6 @@
#define AIDGE_CPU_IMPORTS_H_
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
......@@ -23,10 +22,10 @@
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
......@@ -74,12 +74,12 @@ class AddImpl_cpu : public OperatorImpl {
return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final {
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final {
// for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
return 0;
}
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final {
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
......@@ -99,11 +99,11 @@ class AddImpl_cpu : public OperatorImpl {
}
void updateConsummerProducer() override final;
void forward() {
void forward() override {
// nothing
}
void backward() { printf("Not implemented yet.\n"); }
void backward() override { printf("Not implemented yet.\n"); }
};
template <>
......@@ -133,9 +133,9 @@ class AddImpl_cpu<1> : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
template <>
......@@ -165,9 +165,9 @@ class AddImpl_cpu<2> : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
template <>
......@@ -196,9 +196,9 @@ class AddImpl_cpu<3> : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -56,9 +56,9 @@ class AvgPoolingImpl2D_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -43,11 +43,11 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(attrs)[0] + std::get<2>(attrs)[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(attrs)[1] + std::get<2>(attrs)[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1])/
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
......@@ -61,11 +61,11 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(std::get<2>(attrs)[0] - ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(std::get<2>(attrs)[1] - oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
......
......@@ -71,9 +71,9 @@ class BatchNormImpl2D_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -58,9 +58,9 @@ class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -9,7 +9,7 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
......@@ -46,11 +46,11 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(attrs)[0] + std::get<4>(attrs)[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(attrs)[1] + std::get<4>(attrs)[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
......@@ -67,16 +67,16 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(std::get<4>(attrs)[0] - ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(std::get<4>(attrs)[1] - oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<4>(attrs)[0];
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<4>(attrs)[1];
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
......
......@@ -58,9 +58,9 @@ class ConvImpl2D_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -88,11 +88,11 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(attrs)[0] + std::get<5>(attrs)[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(attrs)[1] + std::get<5>(attrs)[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
......@@ -110,16 +110,16 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(std::get<5>(attrs)[0] - ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(std::get<5>(attrs)[1] - oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<5>(attrs)[0];
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<5>(attrs)[1];
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
......
......@@ -51,9 +51,9 @@ class FCImpl_cpu : public OperatorImpl {
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -50,9 +50,9 @@ class LeakyReLUImpl_cpu : public OperatorImpl {
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -64,8 +64,8 @@ public:
void updateConsummerProducer() override final;
void forward();
void backward();
void forward() override;
void backward() override;
};
namespace {
......
......@@ -56,9 +56,9 @@ class MaxPoolingImpl2D_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -42,15 +42,14 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs,
std::array<DimSize_t, 2> strideDims = std::get<0>(attrs);
std::array<DimSize_t, 2> kernelDims = std::get<1>(attrs);
std::array<DimSize_t, 4> paddingDims = std::get<2>(attrs);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + paddingDims[0] + paddingDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + paddingDims[1] + paddingDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
......@@ -64,11 +63,11 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs,
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(paddingDims[0] - ox * strideDims[0]);
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(paddingDims[1] - oy * strideDims[1]);
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADIMPL_H_
#define AIDGE_CPU_OPERATOR_PADIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Pad.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// class Pad_Op;
// compute kernel registry for forward and backward
class PadImpl2DForward_cpu
: public Registrable<PadImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
void *)> {};
class PadImpl2DBackward_cpu
: public Registrable<PadImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
void *)> {};
class PadImpl2D_cpu : public OperatorImpl {
private:
const Pad_Op<2> &mOp;
std::array<NbElts_t, 1> mNbConsumedData = {0};
std::array<NbElts_t, 1> mNbProducedData = {0};
public:
PadImpl2D_cpu(const Pad_Op<2> &op) : mOp(op) {}
static std::unique_ptr<PadImpl2D_cpu> create(const Pad_Op<2> &op) {
return std::make_unique<PadImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override;
void backward() override;
};
namespace {
// add cpu backend to Pad_Op<2> implementation registry
static Registrar<Pad_Op<2>> registrarPadImpl2D_cpu("cpu", Aidge::PadImpl2D_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/utils/Types.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D Padding on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param attrs tuple of Parameters from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
const void *input_, void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const std::size_t oySize = dims[2] + std::get<0>(attrs)[0][0] + std::get<0>(attrs)[0][1];
const std::size_t oxSize = dims[3] + std::get<0>(attrs)[1][0] + std::get<0>(attrs)[1][1];
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
for (unsigned int oy = 0; oy < oySize; ++oy) {
for (unsigned int ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
O outputValue = std::get<2>(attrs);
if (std::get<1>(attrs) == PadBorderType::Constant) {
int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1][1]);
int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[0][1]);
if (ix >= 0 && ix < static_cast<int>(dims[3]) && iy >= 0 && iy < static_cast<int>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
}
}
else if (std::get<1>(attrs) == PadBorderType::Replicate) {
int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1][1])));
int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[0][1])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
}
else if (std::get<1>(attrs) == PadBorderType::Reflect) {
int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1][1]);
int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[0][1]);
if (ix < 0)
ix = 0 - ix;
if (iy < 0)
iy = 0 - iy;
if (ix >= static_cast<int>(dims[3]))
ix = static_cast<int>(dims[3]) - ix;
if (iy >= static_cast<int>(dims[2]))
iy = static_cast<int>(dims[2]) - iy;
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
}
else if (std::get<1>(attrs) == PadBorderType::Wrap) {
int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1][1])) % static_cast<int>(dims[3]);
int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[0][1])) % static_cast<int>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
}
output[oIndexFull] = outputValue;
}
}
}
}
}
namespace {
static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::PadImpl2D_cpu_forward_kernel<float, float>);
static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::PadImpl2D_cpu_forward_kernel<int, int>);
static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::PadImpl2D_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ */
......@@ -39,9 +39,9 @@ class ProducerImpl_cpu : public OperatorImpl {
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -50,9 +50,9 @@ class ReLUImpl_cpu : public OperatorImpl {
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
......@@ -54,9 +54,9 @@ class ScalingImpl_cpu : public OperatorImpl {
void updateConsummerProducer() override final;
void forward();
void forward() override;
void backward();
void backward() override;
};
namespace {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment