Skip to content
Snippets Groups Projects
Commit c1f1e990 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Merged with dev

parents 963989a0 5e85b482
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!37Support for recurrent networks
Pipeline #39491 failed
Showing
with 609 additions and 350 deletions
...@@ -136,6 +136,8 @@ build:ubuntu_python: ...@@ -136,6 +136,8 @@ build:ubuntu_python:
- source venv/bin/activate - source venv/bin/activate
- python3 -m pip install -r requirements.txt - python3 -m pip install -r requirements.txt
- python3 -m pip install . - python3 -m pip install .
- python3 -m pip install numpy unittest-xml-reporting
- python3 -m pip list
artifacts: artifacts:
expire_in: 1 week expire_in: 1 week
paths: paths:
...@@ -160,9 +162,10 @@ build:ubuntu_python: ...@@ -160,9 +162,10 @@ build:ubuntu_python:
# script: # script:
# # Download dependencies # # Download dependencies
# # aidge_core # # aidge_core
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip' # - $DEPENDENCY_NAME="aidge_core"
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force # - $DEPENDENCY_JOB="build:windows_cpp"
# - Remove-Item .\build_cpp\ -Recurse # - !reference [.download_dependency_windows, script]
# - Remove-Item .\build_cpp\ -Recurse -Force -ErrorAction Ignore
# - $env:CMAKE_PREFIX_PATH = '../install_cpp' # - $env:CMAKE_PREFIX_PATH = '../install_cpp'
# - mkdir -p build_cpp # - mkdir -p build_cpp
...@@ -196,8 +199,9 @@ build:ubuntu_python: ...@@ -196,8 +199,9 @@ build:ubuntu_python:
# script: # script:
# # Download dependencies # # Download dependencies
# # aidge_core (Python) # # aidge_core (Python)
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip' # - $DEPENDENCY_NAME="aidge_core"
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force # - $DEPENDENCY_JOB="build:windows_python"
# - !reference [.download_dependency_windows, script]
# - python -m pip install virtualenv # - python -m pip install virtualenv
# - virtualenv venv # - virtualenv venv
......
...@@ -18,9 +18,8 @@ test:ubuntu_python: ...@@ -18,9 +18,8 @@ test:ubuntu_python:
script: script:
- source venv/bin/activate - source venv/bin/activate
- cd ${CI_PROJECT_NAME} - cd ${CI_PROJECT_NAME}
- python3 -m pip install numpy unittest-xml-reporting
- python3 -m pip list # Run on discovery all tests located in core/unit_tests/python and discard the stdout
# Run on discovery all tests located in core/unit_tests/python and discard the stdout
# only to show the errors/warnings and the results of the tests # only to show the errors/warnings and the results of the tests
- python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml - python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml
artifacts: artifacts:
......
import unittest
import aidge_core
import aidge_backend_cpu
import numpy as np
class test_tensor(unittest.TestCase):
"""Test tensor binding
"""
def setUp(self):
pass
def tearDown(self):
pass
def test_getavailable_backends(self):
self.assertTrue("cpu" in aidge_core.Tensor.get_available_backends())
def test_numpy_int_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int32)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n)
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_tensor_int_to_numpy(self):
np_array = np.arange(9).reshape(1,1,3,3)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
# Tensor -> Numpy
nnarray = np.array(t)
for i_nn, i_n in zip(nnarray.flatten(), np_array.flatten()):
self.assertTrue(i_nn == i_n)
for i,j in zip(t.dims(), nnarray.shape):
self.assertEqual(i,j)
def test_numpy_int64_to_tensor(self):
np_array = np.arange(9).reshape(1,1,3,3).astype(np.int64)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Int64)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n)
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_numpy_float_to_tensor(self):
t = aidge_core.Tensor()
np_array = np.random.rand(1, 1, 3, 3).astype(np.float32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
self.assertEqual(t.dtype(), aidge_core.DataType.Float32)
for i_t, i_n in zip(t, np_array.flatten()):
self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference
for i,j in zip(t.dims(), np_array.shape):
self.assertEqual(i,j)
def test_get_set(self):
dims = [2,2,2]
np_array = np.arange(8).reshape(dims).astype(np.int32)
# Numpy -> Tensor
t = aidge_core.Tensor(np_array)
for i in range(8):
self.assertEqual(t[i], i)
t[i] = 5
self.assertEqual(t[i], 5)
if __name__ == '__main__':
unittest.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#ifndef AIDGE_CPU_IMPORTS_H_ #ifndef AIDGE_CPU_IMPORTS_H_
#define AIDGE_CPU_IMPORTS_H_ #define AIDGE_CPU_IMPORTS_H_
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
...@@ -21,7 +20,9 @@ ...@@ -21,7 +20,9 @@
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MemorizeImpl.hpp" #include "aidge/backend/cpu/operator/MemorizeImpl.hpp"
...@@ -29,7 +30,9 @@ ...@@ -29,7 +30,9 @@
#include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/backend/cpu/operator/PopImpl.hpp" #include "aidge/backend/cpu/operator/PopImpl.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp"
...@@ -37,5 +40,9 @@ ...@@ -37,5 +40,9 @@
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp"
#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#endif /* AIDGE_CPU_IMPORTS_H_ */
#endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_GETCPUPTR_H_
#define AIDGE_CPU_DATA_GETCPUPTR_H_
#include "aidge/data/Tensor.hpp"
namespace Aidge {
inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) {
const auto tensor = std::static_pointer_cast<Tensor>(data);
return tensor->getImpl()->hostPtr(tensor->getImplOffset());
}
} // namespace Aidge
#endif // AIDGE_CPU_DATA_GETCPUPTR_H_
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
#define AIDGE_CPU_DATA_TENSORIMPL_H_
#include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
template <class T>
class TensorImpl_cpu : public TensorImpl {
private:
/// Pointer to the data and its capacity
future_std::span<T> mData;
/// If this instance own the data, std::unique_ptr manages it
std::unique_ptr<T[]> mDataOwner;
public:
static constexpr const char *Backend = "cpu";
TensorImpl_cpu(DeviceIdx_t device, NbElts_t length) : TensorImpl(Backend, device, length) {}
bool operator==(const TensorImpl &otherImpl) const override final {
const auto& typedOtherImpl = reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl);
AIDGE_INTERNAL_ASSERT(typedOtherImpl.size() >= mNbElts);
std::size_t i = 0;
for (; i < mNbElts &&
*static_cast<const T*>(rawPtr(i)) == *static_cast<const T*>(typedOtherImpl.rawPtr(i));
++i) {
}
return i == mNbElts;
}
static std::shared_ptr<TensorImpl_cpu> create(DeviceIdx_t device, NbElts_t length) {
return std::make_shared<TensorImpl_cpu<T>>(device, length);
}
inline std::size_t scalarSize() const noexcept override final { return sizeof(T); }
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
const T* srcT = static_cast<const T *>(src);
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
AIDGE_ASSERT(dstT < srcT || dstT >= srcT + length, "overlapping copy is not supported");
std::copy(srcT, srcT + length, dstT);
}
void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) override final {
if (length == 0) {
return;
}
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
switch (srcDt)
{
case DataType::Float64:
std::copy(static_cast<const double*>(src), static_cast<const double*>(src) + length,
dstT);
break;
case DataType::Float32:
std::copy(static_cast<const float*>(src), static_cast<const float*>(src) + length,
dstT);
break;
case DataType::Float16:
std::copy(static_cast<const half_float::half*>(src), static_cast<const half_float::half*>(src) + length,
dstT);
break;
case DataType::Int64:
std::copy(static_cast<const int64_t*>(src), static_cast<const int64_t*>(src) + length,
dstT);
break;
case DataType::UInt64:
std::copy(static_cast<const uint64_t*>(src), static_cast<const uint64_t*>(src) + length,
dstT);
break;
case DataType::Int32:
std::copy(static_cast<const int32_t*>(src), static_cast<const int32_t*>(src) + length,
dstT);
break;
case DataType::UInt32:
std::copy(static_cast<const uint32_t*>(src), static_cast<const uint32_t*>(src) + length,
dstT);
break;
case DataType::Int16:
std::copy(static_cast<const int16_t*>(src), static_cast<const int16_t*>(src) + length,
dstT);
break;
case DataType::UInt16:
std::copy(static_cast<const uint16_t*>(src), static_cast<const uint16_t*>(src) + length,
dstT);
break;
case DataType::Int8:
std::copy(static_cast<const int8_t*>(src), static_cast<const int8_t*>(src) + length,
dstT);
break;
case DataType::UInt8:
std::copy(static_cast<const uint8_t*>(src), static_cast<const uint8_t*>(src) + length,
dstT);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type.");
break;
}
}
void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override final {
AIDGE_ASSERT(device.first == Backend, "backend must match");
AIDGE_ASSERT(device.second == 0, "device cannot be != 0 for CPU backend");
copy(src, length, offset);
}
inline void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override final {
copy(src, length, offset);
}
void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override final {
const T* src = static_cast<const T*>(rawPtr(offset));
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "copy length is above capacity");
std::copy(src, src + length, static_cast<T *>(dst));
}
void *rawPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
const void *rawPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const rawPtr");
return (mData.data() + offset);
};
void *hostPtr(NbElts_t offset = 0) override final {
lazyInit();
return (mData.data() + offset);
};
const void *hostPtr(NbElts_t offset = 0) const override final {
AIDGE_ASSERT(mData.size() >= mNbElts, "accessing uninitialized const hostPtr");
return (mData.data() + offset);
};
void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mNbElts, "trying to set raw pointer of insufficient capacity");
mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset();
};
virtual ~TensorImpl_cpu() = default;
private:
void lazyInit() {
if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data");
mDataOwner.reset(new T[mNbElts]);
mData = future_std::span<T>(mDataOwner.get(), mNbElts);
}
}
};
namespace {
static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
{"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
{"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Float16(
{"cpu", DataType::Float16}, Aidge::TensorImpl_cpu<half_float::half>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
{"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
static Registrar<Tensor> registrarTensorImpl_cpu_Int64(
{"cpu", DataType::Int64}, Aidge::TensorImpl_cpu<long>::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Erf_Op;
// compute kernel registry for forward and backward
class ErfImplForward_cpu
: public Registrable<ErfImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImplBackward_cpu
: public Registrable<ErfImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImpl_cpu : public OperatorImpl {
public:
ErfImpl_cpu(const Erf_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ErfImpl_cpu> create(const Erf_Op& op) {
return std::make_unique<ErfImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Erf_Op> registrarErfImpl_cpu("cpu", Aidge::ErfImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#include <cmath>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
namespace Aidge {
template <class I, class O>
void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = std::erf(input[i]);
}
}
namespace {
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ErfImpl_cpu_forward_kernel<float, float>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ErfImpl_cpu_forward_kernel<int, int>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ErfImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_H_
#define AIDGE_CPU_OPERATOR_GATHERIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Gather.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Gather_Op;
// compute kernel registry for forward and backward
class GatherImplForward_cpu
: public Registrable<GatherImplForward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class GatherImplBackward_cpu
: public Registrable<GatherImplBackward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
};
class GatherImpl_cpu : public OperatorImpl {
public:
GatherImpl_cpu(const Gather_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<GatherImpl_cpu> create(const Gather_Op& op) {
return std::make_unique<GatherImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Gather_Op> registrarGatherImpl_cpu("cpu", Aidge::GatherImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
namespace Aidge {
template <class I, class O>
void GatherImpl_cpu_forward_kernel(const typename Gather_Op::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t axisIdx = std::get<2>(attrs)>=0 ?
std::get<2>(attrs) :
static_cast<std::size_t>(std::get<2>(attrs)) + inputDims.size();
std::size_t postAxisElems = 1;
for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
const std::vector<std::int64_t> indices = std::get<0>(attrs);
for (std::size_t i=0; i<preAxisElems; ++i)
{
for(std::size_t j=0; j<indices.size(); ++j)
{
const std::size_t idx = indices[j] >= 0 ? indices[j] : static_cast<std::size_t>(indices[j]) + inputDims[axisIdx];
const I* startPtr = std::next(input, i * postAxisElems * inputDims[axisIdx] + idx * postAxisElems);
std::copy_n(startPtr, postAxisElems, output);
output += postAxisElems;
}
}
}
namespace {
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::GatherImpl_cpu_forward_kernel<float, float>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::GatherImpl_cpu_forward_kernel<int, int>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::GatherImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_ */
...@@ -25,7 +25,7 @@ void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, ...@@ -25,7 +25,7 @@ void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
I negativeSlope = static_cast<I>(std::get<0>(attrs)); const I negativeSlope = static_cast<const I>(std::get<0>(attrs));
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
......
...@@ -23,16 +23,14 @@ ...@@ -23,16 +23,14 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge { namespace Aidge {
// class MatMul_Op;
// compute kernel registry for forward and backward
class MatMulImplForward_cpu class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImplBackward_cpu class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl { class MatMulImpl_cpu : public OperatorImpl {
......
...@@ -12,45 +12,39 @@ ...@@ -12,45 +12,39 @@
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge { namespace Aidge {
template <class I, class W, class O> template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input_, const void* weights_, void* output_) { const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments // FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_); const I* input1 = static_cast<const I*>(input1_);
const W* weights = static_cast<const W*>(weights_); const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < n; ++i) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); for (std::size_t j = 0; j < m; ++j) {
O sum = O(0);
for (std::size_t batch = 0; batch < batchSize; ++batch) { for (std::size_t l = 0; l < k; ++l) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, }
input + (batch + 1)*oneInputSize, output[i*m + j] = sum;
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
} }
} }
} }
namespace { namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32}, {DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace } // namespace
} // namespace Aidge } // namespace Aidge
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// class ReduceMean_Op;
// compute kernel registry for forward and backward
// DIM 1
class ReduceMeanImpl1DForward_cpu
: public Registrable<ReduceMeanImpl1DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl1DBackward_cpu
: public Registrable<ReduceMeanImpl1DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
// DIM 2
class ReduceMeanImpl2DForward_cpu
: public Registrable<ReduceMeanImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl2DBackward_cpu
: public Registrable<ReduceMeanImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
// DIM 3
class ReduceMeanImpl3DForward_cpu
: public Registrable<ReduceMeanImpl3DForward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl3DBackward_cpu
: public Registrable<ReduceMeanImpl3DBackward_cpu,
std::tuple<DataType, DataType>,
void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
class ReduceMeanImpl1D_cpu : public OperatorImpl {
public:
ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
return std::make_unique<ReduceMeanImpl1D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class ReduceMeanImpl2D_cpu : public OperatorImpl {
public:
ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
return std::make_unique<ReduceMeanImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
class ReduceMeanImpl3D_cpu : public OperatorImpl {
public:
ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op) {}
static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
return std::make_unique<ReduceMeanImpl3D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to ReduceMean_Op<2> implementation registry
static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
#include <cstddef>
#include <algorithm> // std::copy, std::for_each
#include <numeric> //std::accumulate
#include <functional> //std::multiplies
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O, DimSize_t DIM>
void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs& attrs,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nb_dims = inputDims.size();
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
if (DIM == 1) {
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[std::get<0>(attrs)[0]];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
output[idx_o] = input[idx_i];
for (std::size_t i = 1; i < dim_i; ++i) {
output[idx_o] += input[idx_i + i*stride_post];
}
output[idx_o] /= dim_i;
}
}
} else {
std::size_t outputElements = totalElements;
std::size_t *stride_post = new std::size_t[nb_dims];
stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1];
}
std::size_t *stride_pre = new std::size_t[nb_dims];
stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
const I* inputAccumulation = input;
I* outputAccumulation = nullptr;
for (const auto& axisInt : std::get<0>(attrs)) {
const std::size_t a = static_cast<std::size_t>(axisInt);
outputElements /= inputDims[a];
outputAccumulation = new I[outputElements];
const std::size_t dim_i = inputDims[a];
for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
outputAccumulation[idx_o] = inputAccumulation[idx_i];
for (std::size_t i = 1; i < dim_i; ++i) {
outputAccumulation[idx_o] += inputAccumulation[idx_i + i*stride_post[a]];
}
}
}
std::for_each(stride_pre+a+1, stride_pre+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Copy elements from inputAccumulation to output while dividing by divisor
I divisor = totalElements / outputElements;
std::transform(inputAccumulation, inputAccumulation + outputElements, output,
[divisor](int element) { return element / divisor; });
if (outputAccumulation) {
delete[] outputAccumulation;
}
delete[] stride_post;
delete[] stride_pre;
}
}
namespace {
// DIM = 1
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
// DIM = 2
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
// DIM = 3
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Reshape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Reshape_Op;
// compute kernel registry for forward and backward
class ReshapeImplForward_cpu
: public Registrable<ReshapeImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImplBackward_cpu
: public Registrable<ReshapeImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImpl_cpu : public OperatorImpl {
public:
ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ReshapeImpl_cpu> create(const Reshape_Op& op) {
return std::make_unique<ReshapeImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Reshape_Op> registrarReshapeImpl_cpu("cpu", Aidge::ReshapeImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cmath>
#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
namespace Aidge {
template <class I, class O>
void ReshapeImpl_cpu_forward_kernel(std::size_t inputLength,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
std::copy_n(input, inputLength, output);
}
namespace {
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::ReshapeImpl_cpu_forward_kernel<float, float>);
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::ReshapeImpl_cpu_forward_kernel<int, int>);
static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::ReshapeImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_ */
...@@ -35,7 +35,7 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs, ...@@ -35,7 +35,7 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
const std::int64_t axis_ = std::get<2>(attrs)[i]; const std::int64_t axis_ = std::get<2>(attrs)[i];
const std::int64_t start_ = std::get<0>(attrs)[i]; const std::int64_t start_ = std::get<0>(attrs)[i];
const std::int64_t end_ = std::get<1>(attrs)[i]; const std::int64_t end_ = std::get<1>(attrs)[i];
const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_ + static_cast<std::int32_t>(inputDims.size())); const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_) + inputDims.size();
const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis]; const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis];
const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis]; const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis];
std::size_t stride = 1; std::size_t stride = 1;
......
...@@ -25,10 +25,10 @@ namespace Aidge { ...@@ -25,10 +25,10 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class SoftmaxImplForward_cpu class SoftmaxImplForward_cpu
: public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(const DimSize_t, const DimSize_t, const DimSize_t, const void*, void*)> { : public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> {
}; };
class SoftmaxImplBackward_cpu class SoftmaxImplBackward_cpu
: public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { : public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> {
}; };
class SoftmaxImpl_cpu : public OperatorImpl { class SoftmaxImpl_cpu : public OperatorImpl {
......
...@@ -23,30 +23,33 @@ ...@@ -23,30 +23,33 @@
namespace Aidge { namespace Aidge {
template <class I, class O> template <class I, class O>
void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize, void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
const DimSize_t channelSize, {
const DimSize_t featureSize,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t batch = 0; batch < batchSize; ++batch) { std::size_t postAxisElems = 1;
for (std::size_t feature = 0; feature < featureSize; ++feature) { for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
std::size_t ioIndex = batch*channelSize*featureSize + feature; postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
I sum(0.0); for (std::size_t i = 0; i < preAxisElems; ++i) {
for (std::size_t ch = 0; ch < channelSize; ++ch) { for (std::size_t j = 0; j < postAxisElems; ++j) {
output[ioIndex] = std::exp(input[ioIndex]); // Calculate sum of exponentials within the axis
sum += output[ioIndex]; I sumExp = 0;
ioIndex+=featureSize; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
sumExp += std::exp(input[inIdx]);
} }
ioIndex = batch*channelSize*featureSize + feature; // Calculate softmax for the current slice along the axis
for (std::size_t ch = 0; ch < channelSize; ++ch) { for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
output[ioIndex] /= sum; std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
ioIndex += featureSize; output[inIdx] = std::exp(input[inIdx]) / sumExp;
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment