diff --git a/include/aidge/backend/QuantizationCPU.hpp b/include/aidge/backend/QuantizationCPU.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4338e04b421ccef1c990840cbc417a2fe7a6d4aa --- /dev/null +++ b/include/aidge/backend/QuantizationCPU.hpp @@ -0,0 +1,22 @@ +// /******************************************************************************** +// * Copyright (c) 2023 CEA-List +// * +// * This program and the accompanying materials are made available under the +// * terms of the Eclipse Public License 2.0 which is available at +// * http://www.eclipse.org/legal/epl-2.0. +// * +// * SPDX-License-Identifier: EPL-2.0 +// * +// ********************************************************************************/ +#ifndef AIDGE_QUANTIZATION_CPU_IMPORTS_H_ +#define AIDGE_QUANTIZATION_CPU_IMPORTS_H_ + +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" +#include "aidge/backend/cpu/operator/LSQImpl.hpp" + +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" + +// ... + +#endif /* AIDGE_QUANTIZATION_CPU_IMPORTS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/FixedQImpl.hpp b/include/aidge/backend/cpu/operator/FixedQImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..def38482cb958387f7504e946fff54b378508ed8 --- /dev/null +++ b/include/aidge/backend/cpu/operator/FixedQImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ +#define AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +// Operator implementation entry point for the backend +using FixedQImpl_cpu = OperatorImpl_cpu<FixedQ_Op, + void(const std::size_t, const float, const bool, const std::size_t, const void*, void*), + void(const std::size_t, const float, const bool, const std::size_t, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(FixedQ_Op, "cpu", Aidge::FixedQImpl_cpu::create); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3e1f9d239fa31d29512a40674ef914486dfbd85a --- /dev/null +++ b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp @@ -0,0 +1,91 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" + +//#include <iostream> + +namespace Aidge { +template <class I, class O> +void FixedQImpl_cpu_forward_kernel( + std::size_t nbBits, + float span_, + bool isOutputUnsigned, + std::size_t inputLenght, + const void* input_, + void* output_) { + + I span = static_cast<I> (span_); + I stepSize = span / static_cast<I> (1 << (nbBits - 1)); + if (isOutputUnsigned) { + stepSize /= 2; + } + + const I upper = span - stepSize; + const I lower = isOutputUnsigned ? 0 : -span; + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + I clipped = std::max(lower, std::min(input[i], upper)); + output[i] = std::round(clipped / stepSize) * stepSize; + } + +} + +template <class I, class GI, class GO> +void FixedQImpl_cpu_backward_kernel( + std::size_t nbBits, + float span_, + bool isOutputUnsigned, + const std::size_t inputLenght, + const void* input_, + const void* grad_output_, + void* grad_input_) { + + I span = static_cast<I> (span_); + I stepSize = span / static_cast<I> (1 << (nbBits - 1)); + if (isOutputUnsigned) { + stepSize /= 2; + } + + const I upper = span - stepSize; + const I lower = isOutputUnsigned ? 0 : -span; + + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + // Clipped Straight Through Estimator + grad_input[i] = ((input[i] > lower) && (input[i] < upper)) ? grad_output[i] : 0; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(FixedQImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<int, int>, Aidge::FixedQImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(FixedQImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<float, float>, Aidge::FixedQImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(FixedQImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<double, double>, Aidge::FixedQImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LSQImpl.hpp b/include/aidge/backend/cpu/operator/LSQImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2a7d927e3aa6841ef500763f882c5aff2d0261a --- /dev/null +++ b/include/aidge/backend/cpu/operator/LSQImpl.hpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LSQIMPL_H_ +#define AIDGE_CPU_OPERATOR_LSQIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// compute kernel registry for forward and backward + +using LSQImpl_cpu = OperatorImpl_cpu<LSQ_Op, + void(const std::size_t, + std::pair<int, int>&, + const void*, + const void*, + void*), + void(const std::size_t, + std::pair<int, int>&, + const void*, + const void*, + const void*, + void*, + void*)>; + + +// Implementation entry point registration to Operator +REGISTRAR(LSQ_Op, "cpu", Aidge::LSQImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LSQIMPL_H_ */ + diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ddb820997837ec9b3603c6007497c8161145d587 --- /dev/null +++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp @@ -0,0 +1,137 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LSQImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LSQImpl_cpu_forward_kernel(std::size_t inputLength, + const std::pair<int, int>& range, + const void* input_, + const void* stepSize_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + const I* stepSize = static_cast<const I*>(stepSize_); + O* output = static_cast<O*>(output_); + + const O bitRangesLowerBound = static_cast<O>(range.first * stepSize[0]); + const O bitRangesUpperBound = static_cast<O>(range.second * stepSize[0]); + +//#pragma omp parallel for if (inputLength > 16) + for (unsigned int i = 0; i < inputLength; i++) { + const O qData = input[i] / stepSize[0]; + + output[i] = + (qData <= static_cast<O>(range.first)) ? bitRangesLowerBound : + (qData >= static_cast<O>(range.second)) ? bitRangesUpperBound : + std::round(qData) * stepSize[0]; + } +} + +template <class I, class GI, class GO> +void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, + const std::pair<int, int>& range, + const void* input_, + const void* stepSize_, + const void* grad_output_, + void* grad_input_, + void* grad_stepSize_) +{ + const I* input = static_cast<const I*>(input_); + const I* stepSize = static_cast<const I*>(stepSize_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + GI* grad_stepSize = static_cast<GI*>(grad_stepSize_); + + GI diffStepSize = GI(0.0); + +#pragma omp parallel for schedule(static, 256) reduction(+:diffStepSize) if(inputLength > 16) + for(unsigned int i=0; i < inputLength / 4; i++) { + const GI fullPrecScale_1 = input[4*i] / stepSize[0]; + const GI fullPrecScale_2 = input[4*i+1] / stepSize[0]; + const GI fullPrecScale_3 = input[4*i+2] / stepSize[0]; + const GI fullPrecScale_4 = input[4*i+3] / stepSize[0]; + /*****************Features Gradient Computation********************/ + // STE method is simply applied + grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + + /*****************Step Size Gradient Computation******************/ + //1st: clip the gradient in interval [rangeMin, rangeMax] and take account of qError + GI qData_1 = fullPrecScale_1; + qData_1 = ((qData_1 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_1 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_1) - qData_1); + GI qData_2 = fullPrecScale_2; + qData_2 = ((qData_2 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_2 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_2) - qData_2); + GI qData_3 = fullPrecScale_3; + qData_3 = ((qData_3 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_3 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_3) - qData_3); + GI qData_4 = fullPrecScale_4; + qData_4 = ((qData_4 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_4 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_4) - qData_4); + //2nd: Multiplie backward data with clipped grad + diffStepSize += ((qData_1*grad_output[4*i] + qData_2*grad_output[4*i+1])+(qData_3*grad_output[4*i+2] + qData_4*grad_output[4*i+3])); + } + + // Process remaining + for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) { + const GI fullPrecScale = input[i] / stepSize[0]; + grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + GI qData = fullPrecScale; + qData = ((qData <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData) - qData); + diffStepSize += qData*grad_output[i]; + } + + const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second)); + // 3rd: Multiply Step Size gradient with scale factor + grad_stepSize[0] = diffStepSize * gradScaleFactor; +} + + +// Kernels registration to implementation entry point +REGISTRAR(LSQImpl_cpu, + {{DataType::Float16, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<half_float::half, half_float::half>, Aidge::LSQImpl_cpu_backward_kernel<half_float::half, half_float::half, half_float::half>}); +REGISTRAR(LSQImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<float, float>, Aidge::LSQImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(LSQImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<double, double>, Aidge::LSQImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..36bf6f9afffa926dc91814c654aec9515294c297 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ +#define AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +// Operator implementation entry point for the backend +using DoReFaImpl_cpu = OperatorImpl_cpu<DoReFa_Op, + void(const std::size_t, float, DoReFaMode, const void*, void*), + void(const std::size_t, float, DoReFaMode, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(DoReFa_Op, "cpu", Aidge::DoReFaImpl_cpu::create); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6c30947ae89dee7b75f8b53e2a24752d6b5d8ab5 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp @@ -0,0 +1,75 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" + +namespace Aidge { + +template <class I, class O> +void DoReFaImpl_cpu_forward_kernel(std::size_t inputLength, + float range, + DoReFaMode mode, + const void* input_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + // Dorefa Quantization + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + if (mode == DoReFaMode::Default) { + auto q = I(0.5) * (input[i] + I(1.0)); + q = O(1.0f / range) * O(std::rintf(q * range)); + output[i] = q * O(2.0) - O(1.0); + } + else { + output[i] = O(1.0f / range) * O(std::rintf(input[i] * range)); + } + } +} + +template <class I, class GI, class GO> +void DoReFaImpl_cpu_backward_kernel(const std::size_t inputLength, + float /*range*/, + DoReFaMode /*mode*/, + const void* /*input_*/, + const void* grad_output_, + void* grad_input_) +{ + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + grad_input[i] = grad_output[i]; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<int, int>, Aidge::DoReFaImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<float, float>, Aidge::DoReFaImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<double, double>, Aidge::DoReFaImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..972ed411f2bdb85dc88c5209f3c6dc947c5401bf --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp @@ -0,0 +1,35 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ +#define AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using TanhClampImpl_cpu = OperatorImpl_cpu<TanhClamp_Op, + void(const std::size_t, const void*, void*, void*), + void(const std::size_t, const void*, const void*, void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(TanhClamp_Op, "cpu", Aidge::TanhClampImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5c9602de67c48a661d57aef4c8c381199bae2c29 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp @@ -0,0 +1,84 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Kernels +template <class I, class O> +void TanhClampImpl_cpu_forward_kernel(std::size_t inputLength, + const void* input_, + void* scaling_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + I scaling = *static_cast<I*>(scaling_); + O* output = static_cast<O*>(output_); + + const auto minMax = std::minmax_element(input, input + inputLength); + const auto absMax = std::max(std::abs(*(minMax.first)), std::abs(*(minMax.second))); + scaling = std::tanh(absMax); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + output[i] = std::tanh(input[i]) / scaling; + } + + // Set the scaling output ... + *(static_cast<I*> (scaling_)) = scaling; +} + +template <class I, class GI, class GO> +void TanhClampImpl_cpu_backward_kernel(const std::size_t inputLength, + const void* input_, + const void* scaling_, + const void* grad_output_, + void* grad_input_) +{ + const I* input = static_cast<const I*>(input_); + const I scaling = *static_cast<const I*>(scaling_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + const auto inv_cosh = GO(1 / std::cosh(input[i])); + const auto grad = inv_cosh * inv_cosh * GO(1 / scaling); + grad_input[i] = grad_output[i] * grad; + } +} + + +// Kernels registration to implementation entry point +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<int, int>, Aidge::TanhClampImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<float, float>, Aidge::TanhClampImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<double, double>, Aidge::TanhClampImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ */ diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..96a52b4592bc05f34a47e04e664df27847a48e85 --- /dev/null +++ b/include/aidge/operator/FixedQ.hpp @@ -0,0 +1,101 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/utils/StaticAttributes.hpp" + + +namespace Aidge { + +enum class FixedQAttr { NbBits, Span, IsOutputUnsigned }; + +class FixedQ_Op : public OperatorTensor, + public Registrable<FixedQ_Op, std::string, + std::function<std::shared_ptr<OperatorImpl>(const FixedQ_Op&)>> { + +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<FixedQAttr, std::size_t, float, bool>; + template <FixedQAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + + FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + FixedQ_Op(const FixedQ_Op& op) + : OperatorTensor(op), mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::FixedQ_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<FixedQ_Op>(*this); + } + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline std::size_t& nbBits() const noexcept { return mAttributes->getAttr<FixedQAttr::NbBits>(); } + inline float& span() const noexcept { return mAttributes->getAttr<FixedQAttr::Span>(); } + inline bool& isOutputUnsigned() const noexcept { return mAttributes->getAttr<FixedQAttr::IsOutputUnsigned>(); } + + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } + + +}; + +inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} +} + +namespace { +template <> +const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; +} + +#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7ce23f01ea94ccfa3ffc7ec6b49a66e8c3048756 --- /dev/null +++ b/include/aidge/operator/LSQ.hpp @@ -0,0 +1,107 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ +#define AIDGE_CORE_OPERATOR_LSQ_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +enum class LSQAttr { Range }; + +/** + * LSQ is the weights AND activations quantizer for the LSQ method. + */ +class LSQ_Op : public OperatorTensor, + public Registrable<LSQ_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const LSQ_Op &)>> { +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<LSQAttr, std::pair<int, int>>; + template <LSQAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + LSQ_Op(const std::pair<int, int>& range = {0, 255}) + : OperatorTensor(Type, {InputCategory::Data, InputCategory::Param}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<LSQAttr::Range>(range))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + LSQ_Op(const LSQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(LSQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::LSQ_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<LSQ_Op>(*this); + } + + bool forwardDims(bool allowDataDependency = false) override final; + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline std::pair<int, int>& range() const noexcept { return mAttributes->getAttr<LSQAttr::Range>(); } + + static const std::vector<std::string> getInputsName(){ + return {"data_input", "step_size"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } +}; + +/** + * Range should be (with N the number of bits): + * - {0, 2^N - 1} in place of ReLU activations + * - {-2^(N-1), 2^(N-1) - 1} in for weights quantization + */ +inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") { + auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name); + addProducer(lsq, 1, {1}, "ss"); + return lsq; +} +} + +namespace { +template <> +const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; +} + +#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp new file mode 100644 index 0000000000000000000000000000000000000000..92ce1677b1b28e303c8488b55dd00cfafb519457 --- /dev/null +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -0,0 +1,109 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ +#define AIDGE_CORE_OPERATOR_DOREFA_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +enum class DoReFaAttr { Range, Mode }; +enum class DoReFaMode { + Default, // Original SAT paper (not including 0) + Symmetric, // Symmetric range including 0 + //Asymmetric, + //FullRange +}; + +/** + * DoReFa is the weights quantizer for the 2nd training phase (quantization) of the SAT method. + */ +class DoReFa_Op : public OperatorTensor, + public Registrable<DoReFa_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const DoReFa_Op&)>> { +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + : OperatorTensor(Type, {InputCategory::Param}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<DoReFaAttr::Range>(range), + attr<DoReFaAttr::Mode>(mode))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::DoReFa_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<DoReFa_Op>(*this); + } + + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } +}; + +inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} +} + +namespace { +template <> +const char *const EnumStrings<Aidge::DoReFaAttr>::data[] = {"range", "mode"}; + +template <> +const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; +} + +#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..def43b872c021e539efe5658b592ceec9b3b5d4d --- /dev/null +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -0,0 +1,83 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +/** + * TanhClamp is the weights clamping for the 1st training phase (clamping) of the SAT method. + */ +class TanhClamp_Op : public OperatorTensor, + public Registrable<TanhClamp_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const TanhClamp_Op&)>> { + +public: + static const std::string Type; + + TanhClamp_Op() + : OperatorTensor(Type, {InputCategory::Data}, 2) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + TanhClamp_Op(const TanhClamp_Op& op) + : OperatorTensor(op) + { + if (op.mImpl){ + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::TanhClamp_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<TanhClamp_Op>(*this); + } + + bool forwardDims(bool allowDataDependency = false) override final; + std::set<std::string> getAvailableBackends() const override final; + + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output", "scaling"}; + } +}; + +inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); +} +} + +#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d24831c3460aac037091636450a7290c47e1775b..b3e25d5c405368b8ebcbd5220831da7e7d3446d1 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -26,12 +26,12 @@ namespace Aidge { /** * @brief Set of the types of the nodes which contain affine transforms (that is Y = A.X + B) */ - static const std::set<std::string> affineNodeTypes({"FC", "Conv", "ConvDepthWise", "PaddedConv", "PaddedConvDepthWise"}); + static const std::set<std::string> affineNodeTypes({"FC", "Conv2D", "ConvDepthWise2D", "PaddedConv2D", "PaddedConvDepthWise2D"}); /** * @brief Set of the types of the nodes which does not affect the PTQ process */ - static const std::set<std::string> seamlessNodeTypes({"Pad", "MaxPooling", "AvgPooling", "PaddedMaxPooling", "PaddedAvgPooling", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); + static const std::set<std::string> seamlessNodeTypes({"Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); /** * @brief Set of the types of the nodes that merge multiple branches into one @@ -74,6 +74,10 @@ namespace Aidge { */ bool checkArchitecture(std::shared_ptr<GraphView> graphView); + + void prepareNetwork(std::shared_ptr<GraphView> graphView); + + /** * @brief Insert a scaling node after each affine node of the GraphView. * Also insert a scaling node in every purely residual branches. @@ -154,7 +158,6 @@ namespace Aidge { * @param graphView The GraphView under test. */ void devPTQ(std::shared_ptr<GraphView> graphView); - } #endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ecbe7422ea85db1771d91e161c93740993ebbe2b --- /dev/null +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ + +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/data/Tensor.hpp" + +namespace Aidge { +namespace QuantFixedQ { + +/** + * @brief Insert the FixedQ quantizer nodes in a given GraphView + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param span Fixed output span of the quantizers. + */ +void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float span); + +/** + * @brief Given a GraphView with parameters properly initialized and some calibration data, + * insert the FixedQ quantizer nodes, and adjust their output spans. + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param calibrationData Calibration data used to adjust the spans. + * @param scale Multiplicative constant applied to the spans. + */ +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData, float scale); + +/** + * @brief Developement and test routine. + * @param graphView The GraphView under test. + */ +void devQAT(std::shared_ptr<GraphView> graphView); + +} +} + +#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ + diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4970be07fae8737a1c2863600757bb81ff3a65f9 --- /dev/null +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QAT_LSQ_H_ + +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/data/Tensor.hpp" + +namespace Aidge { +namespace QuantLSQ { + +/** + * @brief Insert the LSQ quantizer nodes in a given GraphView + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param span Fixed output span of the quantizers. + */ +void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); + +/** + * @brief Given a GraphView with parameters properly initialized and some calibration data, + * insert the LSQ quantizer nodes, and adjust their step-sizes. + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param calibrationData Calibration data used to adjust the spans. + * @param scale Multiplicative constant applied to the spans. + */ +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); + +} +} + +#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ + diff --git a/python_binding/operator/SAT/pybind_DoReFa.cpp b/python_binding/operator/SAT/pybind_DoReFa.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e66665f71cf22ee48dd814692e7645c515e4dc1 --- /dev/null +++ b/python_binding/operator/SAT/pybind_DoReFa.cpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_DoReFa(py::module& m) { + py::enum_<DoReFaMode>(m, "DoReFaMode") + .value("Default", DoReFaMode::Default) + .value("Symmetric", DoReFaMode::Symmetric) + .export_values(); + + py::class_<DoReFa_Op, std::shared_ptr<DoReFa_Op>, OperatorTensor>(m, "DoReFaOp", py::multiple_inheritance()) + .def(py::init<size_t, DoReFaMode>(), py::arg("range") = 255, py::arg("mode") = DoReFaMode::Default) + .def_static("get_inputs_name", &DoReFa_Op::getInputsName) + .def_static("get_outputs_name", &DoReFa_Op::getOutputsName); + declare_registrable<DoReFa_Op>(m, "DoReFaOp"); + m.def("DoReFa", &DoReFa, py::arg("range") = 255, py::arg("mode") = DoReFaMode::Default, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/operator/SAT/pybind_TanhClamp.cpp b/python_binding/operator/SAT/pybind_TanhClamp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed019b20e73bf916cf26da5a443fb23c41370d09 --- /dev/null +++ b/python_binding/operator/SAT/pybind_TanhClamp.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_TanhClamp(py::module& m) { + py::class_<TanhClamp_Op, std::shared_ptr<TanhClamp_Op>, OperatorTensor>(m, "TanhClampOp", py::multiple_inheritance()) + .def(py::init<>()) + .def_static("get_inputs_name", &TanhClamp_Op::getInputsName) + .def_static("get_outputs_name", &TanhClamp_Op::getOutputsName); + declare_registrable<TanhClamp_Op>(m, "TanhClampOp"); + m.def("TanhClamp", &TanhClamp, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/operator/pybind_FixedQ.cpp b/python_binding/operator/pybind_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a85fde261e3b3651446e031abb15518a960fac68 --- /dev/null +++ b/python_binding/operator/pybind_FixedQ.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_FixedQ(py::module& m) { + py::class_<FixedQ_Op, std::shared_ptr<FixedQ_Op>, OperatorTensor>(m, "FixedQOp", py::multiple_inheritance()) + .def(py::init<std::size_t, float, bool>(), py::arg("nb_bits"), py::arg("span"), py::arg("is_output_unsigned")) + .def_static("get_inputs_name", &FixedQ_Op::getInputsName) + .def_static("get_outputs_name", &FixedQ_Op::getOutputsName); + declare_registrable<FixedQ_Op>(m, "FixedQOp"); + m.def("FixedQ", &FixedQ, py::arg("nb_bits") = 8, py::arg("span") = 4.0f, py::arg("is_output_unsigned") = false, py::arg("name") = ""); +} +} // namespace Aidge \ No newline at end of file diff --git a/python_binding/operator/pybind_LSQ.cpp b/python_binding/operator/pybind_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f5fe467c484067caed951aad873196c7fc8eef4 --- /dev/null +++ b/python_binding/operator/pybind_LSQ.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_LSQ(py::module& m) { + py::class_<LSQ_Op, std::shared_ptr<LSQ_Op>, OperatorTensor>(m, "LSQOp", py::multiple_inheritance()) + .def(py::init<const std::pair<int, int>&>(), py::arg("range") = std::pair<int, int>{0, 255}) + .def_static("get_inputs_name", &LSQ_Op::getInputsName) + .def_static("get_outputs_name", &LSQ_Op::getOutputsName); + declare_registrable<LSQ_Op>(m, "LSQOp"); + m.def("LSQ", &LSQ, py::arg("range") = std::pair<int, int>{0, 255}, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index ef6e5315851578b20341f088a2b4aed62ab431ed..0475b3f2f98534282a68b1c41e46e9c09b9c7701 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -24,7 +24,7 @@ namespace py = pybind11; namespace Aidge { -void init_QuantPTQ(py::module &m) { +void init_PTQ(py::module &m) { py::enum_<Clipping>(m, "Clipping", "Kind of clipping policy to apply during the activation quantization") .value("MAX", Clipping::MAX) @@ -79,7 +79,7 @@ void init_QuantPTQ(py::module &m) { :type value_ranges: list of float. )mydelimiter"); - m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("apply_rounding"), py::arg("optimize_signs"), py::arg("verbose"), + m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("apply_rounding"), py::arg("optimize_signs"), py::arg("verbose") = false, R"mydelimiter( Quantize an already normalized (in term of parameters and activations) network. :param network: The GraphView to be quantized. @@ -94,7 +94,11 @@ void init_QuantPTQ(py::module &m) { :type verbose: bool )mydelimiter"); +<<<<<<< HEAD m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX, py::arg("apply_rounding") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("verbose") = false, +======= + m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("apply_rounding") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("verbose") = false, +>>>>>>> origin/DevQAT R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. @@ -154,7 +158,7 @@ void init_QuantPTQ(py::module &m) { :rtype: float )mydelimiter"); - m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("verbose"), + m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("verbose") = false, R"mydelimiter( Return a corrected map of the provided activation ranges. To do so compute the optimal clipping values for every node and multiply the input ranges by those values. @@ -176,7 +180,7 @@ void init_QuantPTQ(py::module &m) { )mydelimiter"); - m.def("compute_sign_map", &computeSignMap, py::arg("network"), py::arg("verbose"), + m.def("compute_sign_map", &computeSignMap, py::arg("network"), py::arg("verbose") = false, R"mydelimiter( For each node, compute the sign of its input and output values. The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. @@ -220,10 +224,9 @@ void init_QuantPTQ(py::module &m) { :param network: The GraphView under test. :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); -} -PYBIND11_MODULE(aidge_quantization, m) { - init_QuantPTQ(m); + m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ"); + } } // namespace Aidge diff --git a/python_binding/pybind_QAT_FixedQ.cpp b/python_binding/pybind_QAT_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b247b6f52ef1c357131ef1e1803f57bda7bbc5de --- /dev/null +++ b/python_binding/pybind_QAT_FixedQ.cpp @@ -0,0 +1,33 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/quantization/QAT/QAT_FixedQ.hpp" +#include "aidge/hook/Hook.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_QAT_FixedQ(py::module &m) { + + auto mQuantFixedQ = m.def_submodule("fixedq"); + + mQuantFixedQ.def("insert_quantizers", &QuantFixedQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("span")); + + mQuantFixedQ.def("insert_and_init_quantizers", &QuantFixedQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"), py::arg("scale")); + + mQuantFixedQ.def("dev_qat", &QuantFixedQ::devQAT, py::arg("network")); +} +} // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6b392da633ae16ad668072360628f54aa3997f07 --- /dev/null +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -0,0 +1,31 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/quantization/QAT/QAT_LSQ.hpp" +#include "aidge/hook/Hook.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_QAT_LSQ(py::module &m) { + + auto mQuantLSQ = m.def_submodule("lsq"); + + mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + + mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); +} +} // namespace Aidge diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f998d8c242aa0d8f3215b1577ca4cfeba73fa265 --- /dev/null +++ b/python_binding/pybind_Quantization.cpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/backend/QuantizationCPU.hpp" +//#include "aidge/backend/QuantizationCUDA.hpp" + +namespace py = pybind11; + +namespace Aidge +{ + +// operators +void init_FixedQ(py::module& m); +void init_LSQ(py::module& m); +void init_TanhClamp(py::module& m); +void init_DoReFa(py::module& m); + + +// quantization routines +void init_PTQ(py::module &m); +void init_QAT_FixedQ(py::module &m); +void init_QAT_LSQ(py::module &m); + + +PYBIND11_MODULE(aidge_quantization, m) +{ + init_FixedQ(m); + init_LSQ(m); + init_TanhClamp(m); + init_DoReFa(m); + + init_PTQ(m); + init_QAT_FixedQ(m); + init_QAT_LSQ(m); +} + +} // namespace Aidge diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index df9b31edc614a8d220747d36832aff710f663bff..45b34fbad408a81785baefe578cdfac5f49a9627 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -84,11 +84,11 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe do { maxRangeDelta = 0.0; - /* - std::cout << " ----- " << std::endl; - for (std::shared_ptr<Node> node : affineNodeVector) - std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - */ + + //std::cout << " ----- " << std::endl; + //for (std::shared_ptr<Node> node : affineNodeVector) + // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; + for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 96dd9809656b5faee9c822eb3905a262e8107f68..05a5b8cbac4433dc7c44ce2c9ad3f31b75c67b1d 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -69,7 +69,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "ReLU", "Producer"}); + std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -128,8 +128,7 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) return maxValue; } -static void removeMatchingNodes(std::vector<std::shared_ptr<Node>>& nodeVector, std::string nodeType) -{ +/* std::vector<std::shared_ptr<Node>>::iterator iter = nodeVector.begin(); while (iter != nodeVector.end()) { @@ -138,6 +137,17 @@ static void removeMatchingNodes(std::vector<std::shared_ptr<Node>>& nodeVector, else ++iter; } +*/ + +// TODO : pass nodeVector by reference ... +static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) +{ + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if (node->type() != nodeType) + remainingNodes.push_back(node); + + return remainingNodes; } static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { @@ -181,10 +191,14 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> nodeVector = scheduler.getStaticScheduling(); + //std::cout << " RNV : NB OF NODES = " << nodeVector.size() << std::endl; + //for (auto node : nodeVector) + // std::cout << node->type() << std::endl; + fixScheduling(nodeVector); - removeMatchingNodes(nodeVector, "Producer"); + nodeVector = removeMatchingNodes(nodeVector, "Producer"); - if (verbose) + if (verbose) { Log::info("NB OF NODES = {}", nodeVector.size()); for (std::shared_ptr<Node> node : nodeVector) @@ -216,7 +230,7 @@ static void popSoftMax(std::shared_ptr<GraphView> graphView) } } -static void prepareNetwork(std::shared_ptr<GraphView> graphView) +void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); @@ -339,6 +353,18 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergin return currNode; } +// XXX double check this ! +static bool nodeHasBias(std::shared_ptr<Node> node) +{ + if (node->getParents().size() == 3) + { + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + if (biasTensor) + return true; + } + return false; +} + void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// @@ -387,15 +413,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) accumulatedRatios[node->name()] = accumulatedRatios[prevNode->name()] * ratio; } - // Handle the bias ... - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + // Handle the bias .. + + if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - - // Check that a bias is present (as it is optional) - if (biasTensor) - rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); } } @@ -569,8 +592,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st if (isAffine(prevNode)) { - bool prevNodeHasBias = (prevNode->getParents().size() == 3); - if (prevNodeHasBias) { + bool prevNodeHasBias = nodeHasBias(prevNode); + if (prevNodeHasBias) + { std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode); rescaleTensor(biasTensor, 1.0 / prevScalingFactor); } @@ -722,14 +746,14 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // SANITY CHECK (TEMPORARY) for (std::shared_ptr<Node> node : nodeVector) - if (node != firstNode) + { + for (std::shared_ptr<Node> child : node->getChildren()) { - for (std::shared_ptr<Node> parent : node->getParents()) - if (parent->type() != "Producer") - if (signMap[parent->name()].second != signMap[node->name()].first) - Log::error(" computeSignMap : link is not sane ! ({} -> {})", parent->name(), node->name()); + if (signMap[node->name()].second != signMap[child->name()].first) + Log::error(" computeSignMap : link is not sane ! ({} -> {})", node->name(), child->name()); } - + } + return signMap; } @@ -769,8 +793,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Rescale the bias tensor - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; @@ -919,8 +942,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool if (applyRounding) roundTensor(weightTensor); - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bfc88dc6c17c560a037eb71d2b0f13eafe7ef052 --- /dev/null +++ b/src/QAT/QAT_FixedQ.cpp @@ -0,0 +1,162 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/quantization/QAT/QAT_FixedQ.hpp" +#include "aidge/operator/FixedQ.hpp" + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/graph/Matching.hpp" + +namespace Aidge { + +void QuantFixedQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float span) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS INSERTION + + auto inputQuantizerName = linearNode->name() + "_fixedq_i"; // TODO : double check this, and use createUniqueName() + auto inputQuantizerNode = FixedQ(nbBits, span, false, inputQuantizerName); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ??? + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<FixedQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->isOutputUnsigned() = true; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + auto paramQuantizerName = linearNode->name() + "_fixedq_p"; // TODO : double check this, and use createUniqueName() + auto paramQuantizerNode = FixedQ(nbBits, span, false, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + } +} + +static float getTensorStd(std::shared_ptr<Tensor> tensor) +{ + float acc = 0; + float * castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += castedTensor[i] * castedTensor[i]; + acc /= static_cast<float> (tensor->size()); + return std::sqrt(acc); +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); + float inputStd = getTensorStd(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputStd)); + std::cout << node->name() << " -> " << inputStd << std::endl; + } + } + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView) +{ + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); + float paramStd = getTensorStd(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramStd)); + std::cout << node->name() << " -> " << paramStd << std::endl; + } + } + + return paramStats; +} + +static void adjustQuantizersSpans(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats, float scale = 4.0f) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // Adjust the input quantizers spans + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<FixedQ_Op>(inputQuantNode->getOperator()); + inputQuantOp->span() = inputStats[linearNode->name()] * scale; + + // Adjust the param quantizers spans + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<FixedQ_Op>(paramQuantNode->getOperator()); + paramQuantOp->span() = paramStats[linearNode->name()] * scale; + } +} + +void QuantFixedQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData, float scale) +{ + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData); + auto paramStats = collectParamStats(graphView); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers spans + adjustQuantizersSpans(graphView, inputStats, paramStats, scale); +} + +void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) +{ + SequentialScheduler scheduler(graphView); + scheduler.generateScheduling(); + auto s = scheduler.getStaticScheduling(); + for (std::shared_ptr<Node> node : s) + std::cout << " name : " << node->name() << std::endl; +} + +} \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f18ae93aac2f620ed32ca8bc42fa82850246e0a --- /dev/null +++ b/src/QAT/QAT_LSQ.cpp @@ -0,0 +1,177 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/quantization/QAT/QAT_LSQ.hpp" +#include "aidge/operator/LSQ.hpp" + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/graph/Matching.hpp" + +namespace Aidge { + +void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // INPUT QUANTIZERS INSERTION + + auto inputQuantizerName = linearNode->name() + "_lsq_i"; // TODO : double check this, and use createUniqueName() + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + + // Set the step size + + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ??? + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + auto paramQuantizerName = linearNode->name() + "_lsq_p"; // TODO : double check this, and use createUniqueName() + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + + // Set the step size + + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + } +} + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + float acc = 0; + float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += std::abs(castedTensor[i]); + acc /= static_cast<float> (tensor->size()); + return acc; +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + std::cout << node->name() << " -> " << inputAbsMean << std::endl; + } + } + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView) +{ + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + std::cout << node->name() << " -> " << paramAbsMean << std::endl; + } + } + + return paramStats; +} + +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS STEP-SIZES + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); + + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + // PARAM QUANTIZERS STEP-SIZES + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); + + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + } +} + +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +{ + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData); + auto paramStats = collectParamStats(graphView); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); +} + +} \ No newline at end of file diff --git a/src/backend/cpu/operator/FixedQImpl.cpp b/src/backend/cpu/operator/FixedQImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ea44adaf9987be6e873ce4f067d2b6e9c7b1674 --- /dev/null +++ b/src/backend/cpu/operator/FixedQImpl.cpp @@ -0,0 +1,76 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" +#include "aidge/backend/cpu/operator/FixedQImpl_kernels.hpp" + +// template<> +// Aidge::Elts_t Aidge::FixedQImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) { +// // this implementation can be in-place +// return Elts_t::DataElts(0); +// } + +template<> +void Aidge::FixedQImpl_cpu::forward() +{ + const FixedQ_Op& op_ = dynamic_cast<const FixedQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); + + // Find the correct kernel type + const auto impl = Registrar<FixedQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + op_.nbBits(), + op_.span(), + op_.isOutputUnsigned(), + in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0)) + ); +} + + +template<> +void Aidge::FixedQImpl_cpu::backward() +{ + const FixedQ_Op& op_ = dynamic_cast<const FixedQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + + // Find the correct kernel type + const auto impl = Registrar<FixedQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + op_.nbBits(), + op_.span(), + op_.isOutputUnsigned(), + gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0) + ); +} diff --git a/src/backend/cpu/operator/LSQImpl.cpp b/src/backend/cpu/operator/LSQImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..32b494376d9e4e8ea2923fe7770ffddb821eeb11 --- /dev/null +++ b/src/backend/cpu/operator/LSQImpl.cpp @@ -0,0 +1,63 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/LSQImpl.hpp" +#include "aidge/backend/cpu/operator/LSQImpl_kernels.hpp" + +template<> +void Aidge::LSQImpl_cpu::forward() { + const LSQ_Op& op_ = dynamic_cast<const LSQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> in1 = op_.getInput(1); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + + // Find the correct kernel type + auto impl = Registrar<LSQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(in0->size(), + op_.range(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0)); +} +template<> +void Aidge::LSQImpl_cpu::backward() { + const LSQ_Op& op_ = dynamic_cast<const LSQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> in1 = op_.getInput(1); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<LSQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + op_.range(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0), + getCPUPtr(gra_int1)); +} diff --git a/src/backend/cpu/operator/SAT/DoReFaImpl.cpp b/src/backend/cpu/operator/SAT/DoReFaImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..58aec791f575b50b46aeea956bfc50def18a6ce4 --- /dev/null +++ b/src/backend/cpu/operator/SAT/DoReFaImpl.cpp @@ -0,0 +1,65 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp" + +template<> +void Aidge::DoReFaImpl_cpu::forward() { + + const DoReFa_Op& op_ = dynamic_cast<const DoReFa_Op&>(mOp); + + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + + // Find the correct kernel type + auto impl = Registrar<DoReFaImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + in0->size(), + op_.range(), + op_.mode(), + getCPUPtr(in0), + getCPUPtr(out0)); +} + +template<> +void Aidge::DoReFaImpl_cpu::backward() { + + const DoReFa_Op& op_ = dynamic_cast<const DoReFa_Op&>(mOp); + + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<DoReFaImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + op_.range(), + op_.mode(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); +} diff --git a/src/backend/cpu/operator/SAT/TanhClampImpl.cpp b/src/backend/cpu/operator/SAT/TanhClampImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f908715478572c11e2b30654334e620e965e2d32 --- /dev/null +++ b/src/backend/cpu/operator/SAT/TanhClampImpl.cpp @@ -0,0 +1,62 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp" + +template<> +void Aidge::TanhClampImpl_cpu::forward() { + + const TanhClamp_Op& op_ = dynamic_cast<const TanhClamp_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> scaling = op_.getOutput(1); + + // Find the correct kernel type + auto impl = Registrar<TanhClampImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + in0->size(), + getCPUPtr(in0), + getCPUPtr(scaling), + getCPUPtr(out0)); +} + +template<> +void Aidge::TanhClampImpl_cpu::backward() { + const TanhClamp_Op& op_ = dynamic_cast<const TanhClamp_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> scaling = op_.getOutput(1); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<TanhClampImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(scaling), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); +} diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..879174032bfcf5b2958b0950d0ed7410ba83331c --- /dev/null +++ b/src/operator/FixedQ.cpp @@ -0,0 +1,30 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/FixedQ.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::FixedQ_Op::Type = "FixedQ"; + +std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { + return Registrar<FixedQ_Op>::getKeys(); +} + +void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(FixedQ_Op, *this, name); + mOutputs[0]->setBackend(name, device); +} diff --git a/src/operator/LSQ.cpp b/src/operator/LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1fd79365b0b70e8c4fa9f43df397e38f7a75e685 --- /dev/null +++ b/src/operator/LSQ.cpp @@ -0,0 +1,49 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/LSQ.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::LSQ_Op::Type = "LSQ"; + +bool Aidge::LSQ_Op::forwardDims(bool /*allowDataDependency*/) { + + // TODO : check if the step size is a scalar ! + if (inputsAssociated()) { + const auto inputsDims = getInput(0)->dims(); + mOutputs[0]->resize(inputsDims); + return true; + } + return false; +} + +std::set<std::string> Aidge::LSQ_Op::getAvailableBackends() const { + return Registrar<LSQ_Op>::getKeys(); +} + +void Aidge::LSQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(LSQ_Op, *this, name); + mOutputs[0]->setBackend(name, device); + + // By default, automatically set backend for alphas inputs + if (getInput(1)) { + getInput(1)->setBackend(name, device); + } + else { + Log::notice("LSQ_Op::setBackend(): could not set backend for step_size input, because input is not connected"); + } +} diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6124bad0e5f04c8e22e2d16c48dd4fe5de7945a --- /dev/null +++ b/src/operator/SAT/DoReFa.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/SAT/DoReFa.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::DoReFa_Op::Type = "DoReFa"; + +std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { + return Registrar<DoReFa_Op>::getKeys(); +} + +void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(DoReFa_Op, *this, name); + mOutputs[0]->setBackend(name, device); +} \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b8d63d7136c45589cba92018d2ecafe17d54e4e --- /dev/null +++ b/src/operator/SAT/TanhClamp.cpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/SAT/TanhClamp.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; + +bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { + + if (inputsAssociated()) { + const auto inputsDims = getInput(0)->dims(); + mOutputs[0]->resize(inputsDims); + mOutputs[1]->resize({1}); + return true; + } + return false; +} + +std::set<std::string> Aidge::TanhClamp_Op::getAvailableBackends() const { + return Registrar<TanhClamp_Op>::getKeys(); +} + +void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(TanhClamp_Op, *this, name); + mOutputs[0]->setBackend(name, device); + + // Scale output is always on CPU for now + mOutputs[1]->setBackend("cpu"); // XXX why ? +} \ No newline at end of file