Skip to content
Snippets Groups Projects
Commit 5f1820a1 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'low_bit_support' into 'dev'

Low bit support

See merge request !16
parents 1a4b72ef 10bfb0ac
No related branches found
No related tags found
No related merge requests found
Showing
with 23179 additions and 235 deletions
/*
(C) Copyright 2017 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_EXPORT_ARM_CONV_CUSTOM_HPP__
#define __N2D2_EXPORT_ARM_CONV_CUSTOM_HPP__
#include <cmath>
#include "kernels/typedefs.hpp"
#include "assert.h"
#include "utils.hpp"
#include "kernels/Macs.hpp"
#include "kernels/subkernels_functions.hpp"
namespace N2D2_Export {
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
// // Memory mapping: inputs
// int INPUT_MEM_CONT_OFFSET,
// int INPUT_MEM_CONT_SIZE,
// int INPUT_MEM_WRAP_OFFSET,
// int INPUT_MEM_WRAP_SIZE,
// int INPUT_MEM_STRIDE,
// // Memory mapping: outputs
// int OUTPUT_MEM_CONT_OFFSET,
// int OUTPUT_MEM_CONT_SIZE,
// int OUTPUT_MEM_WRAP_OFFSET,
// int OUTPUT_MEM_WRAP_SIZE,
// int OUTPUT_MEM_STRIDE,
typename Sum_T, typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T, typename Rescaling_T>
__attribute__((always_inline)) inline static
void lowbitconvcellPropagate(const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Bias_T* __restrict biasses,
const Weight_T* __restrict weights,
const Rescaling_T& __restrict rescaling)
{
PackSupport infoPack = {0, 0};
constexpr int bits_norm_in = (std::numeric_limits<Input_T>::digits >= 8)
? 8/std::ceil(8/(float)std::numeric_limits<Input_T>::digits)
: 8/std::floor(8/(float)std::numeric_limits<Input_T>::digits);
constexpr int bits_norm_wt = (std::numeric_limits<Weight_T>::digits >= 8)
? 8/std::ceil(8/(float)std::numeric_limits<Weight_T>::digits)
: 8/std::floor(8/(float)std::numeric_limits<Weight_T>::digits);
constexpr int INPUTS_BYTE
= std::ceil(((NB_CHANNELS * bits_norm_in)
+ (NB_CHANNELS * bits_norm_in) % 8) / (float)8);
constexpr int WEIGHTS_BYTE
= std::ceil(((NB_CHANNELS * bits_norm_wt)
+ (NB_CHANNELS * bits_norm_wt) % 8) / (float)8);
int outputOffset = 0;
int iy = 0;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0 : max(PADDING_Y - iy, 0);
const int syMax = (PADDING_Y == 0) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - iy,
0, KERNEL_HEIGHT);
int ix = 0;
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
const int sxMin = (PADDING_X == 0) ? 0 : max(PADDING_X - ix, 0);
const int sxMax = (PADDING_X == 0) ? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - ix,
0, KERNEL_WIDTH);
for (int och = 0; och < NB_OUTPUTS; ++och) {
Sum_T weightedSum = biasses[och];
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if (PADDING_Y != 0 && (sy < syMin || sy >= syMax)) {
continue;
}
const int inputsOffset = (iy + sy - PADDING_Y) * CHANNELS_WIDTH * INPUTS_BYTE
+ (ix - PADDING_X) * INPUTS_BYTE;
const int weightsOffset = och * KERNEL_HEIGHT * KERNEL_WIDTH * WEIGHTS_BYTE
+ sy * KERNEL_WIDTH * WEIGHTS_BYTE;
// if (PADDING_X == 0
// && (NB_CHANNELS * std::numeric_limits<Weight_T>::digits % 8 == 0)
// && (NB_CHANNELS * std::numeric_limits<Input_T>::digits % 8 == 0)) {
if (PADDING_X == 0
&& (NB_CHANNELS * bits_norm_wt % 8 == 0)
&& (NB_CHANNELS * bits_norm_in % 8 == 0)) {
macsOnRange<KERNEL_WIDTH * NB_CHANNELS>(inputs + inputsOffset,
weights + weightsOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if(sx < sxMin || sx >= sxMax) {
continue;
}
macsOnRange<NB_CHANNELS>(inputs + inputsOffset + sx * INPUTS_BYTE,
weights + weightsOffset + sx * WEIGHTS_BYTE,
weightedSum);
}
}
}
Output_T output = sat<Output_T>(weightedSum,och, ACTIVATION, rescaling);
compact_data_during_loop(output, outputs, outputOffset, infoPack);
}
compact_data_end_loop(outputs, outputOffset, infoPack);
ix += STRIDE_X;
}
iy += STRIDE_Y;
}
}
} // N2D2_Export
#endif // __N2D2_EXPORT_ARM_CONV_CUSTOM_HPP__
/*
(C) Copyright 2017 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_EXPORT_CPP_CUSTOMFC_HPP__
#define __N2D2_EXPORT_CPP_CUSTOMFC_HPP__
#include <cmath>
#include "kernels/typedefs.hpp"
#include "assert.h"
#include "utils.hpp"
#include "kernels/Macs.hpp"
#include "kernels/subkernels_functions.hpp"
namespace N2D2_Export {
template<int NB_CHANNELS, int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS, int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
ActivationFunction_T ACTIVATION,
typename Sum_T, typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T, typename Rescaling_T>
__attribute__((always_inline)) inline static
void lowbitfccellPropagate(const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Bias_T* __restrict biasses,
const Weight_T* __restrict weights,
const Rescaling_T& __restrict rescaling)
{
static_assert(OUTPUTS_HEIGHT == 1, "Outputs height should be 1");
static_assert(OUTPUTS_WIDTH == 1, "Outputs width should be 1");
PackSupport infoPack = {0, 0};
constexpr int INPUTS_BYTE
= std::ceil(((NB_CHANNELS * std::numeric_limits<Input_T>::digits)
+ (NB_CHANNELS * std::numeric_limits<Input_T>::digits) % 8) / (float)8);
constexpr int WEIGHTS_BYTE
= std::ceil(((NB_CHANNELS * std::numeric_limits<Weight_T>::digits)
+ (NB_CHANNELS * std::numeric_limits<Weight_T>::digits) % 8) / (float)8);
int outputOffset = 0;
for (int och = 0; och < NB_OUTPUTS; ++och) {
Sum_T weightedSum = biasses[och];
for (int iy = 0; iy < CHANNELS_HEIGHT; ++iy) {
for (int ix = 0; ix < CHANNELS_WIDTH; ++ix) {
const int weightsOffset = CHANNELS_HEIGHT * CHANNELS_WIDTH * WEIGHTS_BYTE * och
+ (CHANNELS_WIDTH * iy + ix) * WEIGHTS_BYTE;
const int inputsOffset = (CHANNELS_WIDTH * iy + ix) * INPUTS_BYTE;
macsOnRange<NB_CHANNELS>(inputs + inputsOffset,
weights + weightsOffset,
weightedSum);
}
}
Output_T output = sat<Output_T>(weightedSum,och, ACTIVATION, rescaling);
compact_data_during_loop(output, outputs, outputOffset, infoPack);
}
compact_data_end_loop(outputs, outputOffset, infoPack);
}
} // N2D2_Export
#endif // __N2D2_EXPORT_CPP_FC_HPP__
/*
(C) Copyright 2017 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_EXPORT_CPP_CUSTOMPOOLING_HPP__
#define __N2D2_EXPORT_CPP_CUSTOMPOOLING_HPP__
#include <cmath>
#include "kernels/typedefs.hpp"
#include "assert.h"
#include "utils.hpp"
#include "kernels/Macs.hpp"
#include "kernels/subkernels_functions.hpp"
namespace N2D2_Export {
template<int NB_CHANNELS, int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS, int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
Pooling_T POOLING, ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T>
__attribute__((always_inline)) inline static
void lowbitpoolcellPropagate(const Input_T* __restrict inputs,
Output_T* __restrict outputs)
{
static_assert(std::is_same<Input_T, Output_T>::value, "Input_T and Output_T must be the same.");
static_assert(NB_CHANNELS == NB_OUTPUTS, "nb_channels should be equal to nb_outputs.");
static_assert(POOLING == Max , "Only supports Max and Average pooling.");
static_assert(ACTIVATION == Linear, "Only supports a Linear activation.");
PackSupport infoPack = {0, 0};
constexpr int INPUTS_BYTE
= std::ceil(((NB_CHANNELS * std::numeric_limits<Input_T>::digits)
+ (NB_CHANNELS * std::numeric_limits<Input_T>::digits) % 8) / (float)8);
constexpr int OUTPUTS_BYTE
= std::ceil(((NB_OUTPUTS * std::numeric_limits<Output_T>::digits)
+ (NB_OUTPUTS * std::numeric_limits<Output_T>::digits) % 8) / (float)8);
int outputOffset = 0;
int iy = 0;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0 : max(PADDING_Y - iy, 0);
const int syMax = (PADDING_Y == 0) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - iy,
0, KERNEL_HEIGHT);
int ix = 0;
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
const int sxMin = (PADDING_X == 0) ? 0 : max(PADDING_X - ix, 0);
const int sxMax = (PADDING_X == 0) ? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - ix,
0, KERNEL_WIDTH);
int och_c = 0;
while (och_c < OUTPUTS_BYTE) {
// typename std::conditional<(!std::is_unsigned<Input_T>::value &&
// std::numeric_limits<Input_T>::digits == 32), data<32>, udata<32>>::type maxVal;
// maxVal = decltype(maxVal)::lowest();
typename std::conditional<(!std::is_unsigned<Input_T>::value &&
std::numeric_limits<Input_T>::digits == 32), int32_t, uint32_t>::type maxVal;
maxVal = std::numeric_limits<decltype(maxVal)>::lowest();
int nb_data = min(OUTPUTS_BYTE-och_c, get_pool_nbData(std::numeric_limits<Input_T>::digits));
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if (PADDING_Y != 0 && (sy < syMin || sy >= syMax)) {
continue;
}
const int inputsOffset = (iy + sy - PADDING_Y) * CHANNELS_WIDTH * INPUTS_BYTE
+ (ix - PADDING_X) * INPUTS_BYTE + och_c;
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if(sx < sxMin || sx >= sxMax) {
continue;
}
parallelMaxPooling(inputs + inputsOffset + sx*INPUTS_BYTE, maxVal, nb_data);
}
}
storeMaxPooling(outputs, outputOffset, maxVal, nb_data);
och_c += nb_data;
}
ix += STRIDE_X;
}
iy += STRIDE_Y;
}
}
}
#endif
\ No newline at end of file
This diff is collapsed.
...@@ -27,15 +27,15 @@ ...@@ -27,15 +27,15 @@
namespace N2D2_Export { namespace N2D2_Export {
static int64_t toInt64(uint32_t lo, uint32_t hi) { // static int64_t toInt64(uint32_t lo, uint32_t hi) {
return (int64_t) (((uint64_t) hi) << 32ull) | ((uint64_t) lo); // return (int64_t) (((uint64_t) hi) << 32ull) | ((uint64_t) lo);
} // }
static int64_t smlal(int32_t lhs, int32_t rhs, // static int64_t smlal(int32_t lhs, int32_t rhs,
uint32_t accumLo, uint32_t accumHi) // uint32_t accumLo, uint32_t accumHi)
{ // {
return ((int64_t) lhs) * ((int64_t) rhs) + toInt64(accumLo, accumHi); // return ((int64_t) lhs) * ((int64_t) rhs) + toInt64(accumLo, accumHi);
} // }
// --------------------------------------------------- // ---------------------------------------------------
// ------------------- No Scaling -------------------- // ------------------- No Scaling --------------------
......
/**
******************************************************************************
* @file subkernels_functions.hpp
* @brief Header file for the network subkernels
*
******************************************************************************
* @attention
*
* (C) Copyright 2021 CEA LIST. All Rights Reserved.
* Contributor(s): Vincent TEMPLIER (vincent.templier@cea.fr)
*
* This file is not part of the open source version of N2D2 and is NOT under
* the CeCILL-C license. This code is the property of the CEA. It can not be
* copied or disseminated without its authorization.
*
******************************************************************************
*/
#ifndef __SUBKERNELS_FUNCTIONS_H__
#define __SUBKERNELS_FUNCTIONS_H__
#include <cstring>
#include <cmsis_compiler.h>
#include "typedefs.hpp"
#include "assert.h"
// ----------------------------------------------------------------------------
// -------------------------- Compression functions ---------------------------
// ----------------------------------------------------------------------------
/**
* @brief Compact data during a loop with an accumulator
* @details This function is used in the network functions to compress
* and store a value in the outputs vector. The function adds
* the value to an accumulator. If the accumulator is full
* (ie all the available slots are taken), then the accumulator
* is stored in the outputs. Otherwise, the accumulator temporaly
* keeps the previous values and it is shifted by
* the number of bits required to store the quantized values.
*
* @param[in] value Value to be stored in the accumulator
* @param[in,out] outputs Pointer to compressed output vector
* @param[in,out] outputOffset Pointer to the current output index
* @param[in,out] infoPack Object containing the accumulator
* @returns None
*
*/
template<typename Output_T, typename std::enable_if_t<std::numeric_limits<Output_T>::digits < 8, int> = 0>
__attribute__((always_inline)) static inline
void compact_data_during_loop (Output_T value,
Output_T* __restrict outputs,
int& outputOffset,
PackSupport& infoPack)
{
if (std::numeric_limits<Output_T>::digits < 8) {
constexpr uint8_t mask = (1U << std::numeric_limits<Output_T>::digits) - 1;
constexpr uint8_t nbSlot = ceil((double)8/std::numeric_limits<Output_T>::digits);
infoPack.accumulator |= value.value & mask;
infoPack.cptAccumulator += 1;
if (infoPack.cptAccumulator == nbSlot) {
outputs[outputOffset] = (Output_T) infoPack.accumulator;
++outputOffset;
infoPack.cptAccumulator = 0;
infoPack.accumulator = 0;
}
else {
infoPack.accumulator <<= std::numeric_limits<Output_T>::digits;
}
} else {
outputs[outputOffset] = (Output_T) value;
++outputOffset;
}
}
template<typename Output_T, typename std::enable_if_t<std::numeric_limits<Output_T>::digits >= 8, int> = 0>
__attribute__((always_inline)) static inline
void compact_data_during_loop (const Output_T value,
Output_T* __restrict outputs,
int& outputOffset,
PackSupport& infoPack)
{
outputs[outputOffset] = value;
}
/**
* @brief Compact data after a loop with an accumulator
* @details It may happen that the accumulator is not completely filled
* after calling "compact_data_during_loop" and the stored
* quantized values in the accumulator have not been saved
* in the outputs. Thus, this function adds extra zeros to the
* accumulator until it is full. Then the accumulator is
* stored in the outputs.
* This function should always be called at the end of a loop
* where "compact_data_during_loop" is called
*
* @param[in,out] outputs Pointer to compressed output vector
* @param[in,out] outputOffset Current output index
* @param[in,out] infoPack Object containing the accumulator
* @returns None
*
*/
template<typename Output_T, typename std::enable_if_t<std::numeric_limits<Output_T>::digits < 8, int> = 0>
__attribute__((always_inline)) static inline
void compact_data_end_loop (Output_T* __restrict outputs,
int& outputOffset,
PackSupport& infoPack)
{
if (std::numeric_limits<Output_T>::digits < 8) {
// if data still accumulated but not stored
if (infoPack.cptAccumulator != 0) {
constexpr unsigned int nbSlot = ceil((double)8/std::numeric_limits<Output_T>::digits);
// Add extra zero to shift data to the left
infoPack.cptAccumulator += 1;
while (infoPack.cptAccumulator < nbSlot) {
infoPack.accumulator <<= std::numeric_limits<Output_T>::digits;
infoPack.cptAccumulator += 1;
}
outputs[outputOffset] = infoPack.accumulator;
++outputOffset;
infoPack.cptAccumulator = 0;
infoPack.accumulator = 0;
}
}
}
template<typename Output_T, typename std::enable_if_t<std::numeric_limits<Output_T>::digits >= 8, int> = 0>
__attribute__((always_inline)) static inline
void compact_data_end_loop (Output_T* __restrict outputs,
int& outputOffset,
PackSupport& infoPack)
{
// Nothing
}
// ----------------------------------------------------------------------------
// ------------------------- Pooling subfunctions -----------------------------
// ------------------------------ Max Pooling ---------------------------------
// ----------------------------------------------------------------------------
__attribute__((always_inline)) static inline
int get_pool_nbData (const int nbBits)
{
int nb_data = 1;
switch (nbBits)
{
case 8: nb_data = 4;
break;
case 4: nb_data = 2;
break;
case 16: nb_data = 2;
break;
default:
break;
}
return nb_data;
}
template<typename Output_T,
typename std::enable_if<std::numeric_limits<Output_T>::digits == 4>::type* = nullptr>
__attribute__((always_inline)) static inline
void storeMaxPooling (Output_T* __restrict outputs,
int& outputOffset,
const uint32_t maxVal,
const int nb_data)
{
uint32_t data_val = maxVal;
assert(nb_data == 2 || nb_data == 1);
// Gather bytes in pairs of bytes
// Ex: 0x0A050403 -> 0x00A50043
data_val = ((data_val & 0x0F000F00) >> 4) | (data_val & 0x000F000F);
// Output compression and storage
for (int index = 0; index < nb_data; ++index) {
outputs[outputOffset] = (uint8_t) ((data_val >> 16*index) & 0xFF);
outputOffset += 1;
}
}
template<typename Output_T,
typename std::enable_if<std::numeric_limits<Output_T>::digits == 8>::type* = nullptr>
__attribute__((always_inline)) static inline
void storeMaxPooling (Output_T* __restrict outputs,
int& outputOffset,
const uint32_t maxVal,
const int nb_data)
{
memcpy(outputs, &maxVal, nb_data*sizeof(uint8_t));
}
template<typename Input_T,
typename std::enable_if<(std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 16)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t& maxVal,
const int nb_data)
{
assert(nb_data == 2 || nb_data == 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint16_t));
maxVal = __UQSUB16(maxVal, in);
maxVal = __UQADD16(maxVal, in);
}
template<typename Input_T,
typename std::enable_if<(!std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 16)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t maxVal,
const int nb_data)
{
assert(nb_data == 2 || nb_data == 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint16_t));
maxVal = __SSUB16(maxVal, in);
maxVal = __SEL(maxVal, 0);
maxVal = __SADD16(maxVal, in);
}
template<typename Input_T,
typename std::enable_if<(std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 8)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t& maxVal,
const int nb_data)
{
assert(nb_data <= 4 && nb_data >= 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint8_t));
maxVal = __UQSUB8(maxVal, in);
maxVal = __UQADD8(maxVal, in);
}
template<typename Input_T,
typename std::enable_if<(!std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 8)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t maxVal,
const int nb_data)
{
assert(nb_data <= 4 && nb_data >= 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint8_t));
maxVal = __SSUB8(maxVal, in);
maxVal = __SEL(maxVal, 0);
maxVal = __SADD8(maxVal, in);
}
template<typename Input_T,
typename std::enable_if<(std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 4)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t& maxVal,
const int nb_data)
{
assert(nb_data == 2 || nb_data == 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint8_t));
in = (in | in << 8) & 0xFF00FF;
in = (in | in << 4) & 0xF0F0F0F;
maxVal = __UQSUB8(maxVal, in);
maxVal = __UQADD8(maxVal, in);
}
template<typename Input_T,
typename std::enable_if<(!std::is_unsigned<Input_T>::value
&& std::numeric_limits<Input_T>::digits == 4)>::type* = nullptr>
__attribute__((always_inline)) static inline
void parallelMaxPooling (const Input_T* __restrict inputs,
uint32_t maxVal,
const int nb_data)
{
assert(nb_data == 2 || nb_data == 1);
uint32_t in = 0;
memcpy((void*) &in, inputs, nb_data*sizeof(uint8_t));
in = (in | in << 8) & 0xFF00FF;
in = (in | in << 4) & 0xF0F0F0F;
in += 0x78787878;
in ^= 0x78787878;
maxVal = __SSUB8(maxVal, in);
maxVal = __SEL(maxVal, 0);
maxVal = __SADD8(maxVal, in);
}
#endif
\ No newline at end of file
/**
******************************************************************************
* @file swar_arm_acle.h
* @brief Complete ARM Non-NEON ACLE intrinsics for Cortex m7 and m4
*
******************************************************************************
* @attention
*
* (C) Copyright 2021 CEA LIST. All Rights Reserved.
* Contributor(s): Vincent TEMPLIER (vincent.templier@cea.fr)
* Philippe DORE (philippe.dore@cea.fr)
*
* This file is not part of the open source version of N2D2 and is NOT under
* the CeCILL-C license. This code is the property of the CEA. It can not be
* copied or disseminated without its authorization.
*
******************************************************************************
*/
#ifndef _SWAR_ARM_ACLE_H
#define _SWAR_ARM_ACLE_H
#include <cmsis_compiler.h>
#include "assert.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Rotate right and perform dual extracted 8-bit to 16-bit signed addition
* @details This function rotates op2, extracts two 8-bit values from op2 (at bit positions [7:0] and [23:16]),
* sign-extend them to 16-bits each, and add the results to op1
* @param[in] op1 Two 16-bit values in op1[15:0] and op1[31:16]
* @param[in] op2 Two 8-bit values in op2[7:0] and op2[23:16] to be sign-extended
* @param[in] ror Number of bits to rotate op2. Only 8,16 and 24 are accepted
* @returns The addition of op1 and op2, where op2 has been rotated, the 8-bit values in op2[7:0]
* and op2[23:16] have been extracted and sign-extended prior to the addition
*
*/
__attribute__((always_inline)) __STATIC_INLINE
int32_t __SXTAB16_RORn (const int32_t op1, const int32_t op2, const int8_t ror)
{
int32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("sxtab16 %0, %1, %2, ROR %3" : "=r" (result) : "r" (op1) , "r" (op2) , "i" (ror) );
return result;
}
/**
* @brief Rotate right, dual extract 8-bits and sign extend each to 16-bits
* @param[in] op1 Two 8-bit values in op1[7:0] and op1[23:16] to be sign-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns The 8-bit values sign-extended to 16-bit values
*
*/
__attribute__((always_inline)) __STATIC_INLINE
int32_t __SXTB16_RORn (const int32_t op1, const int8_t ror)
{
int32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("sxtb16 %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Rotate right and perform dual extracted 8-bit to 16-bit zero addition
* @details This function rotates op2, extracts two 8-bit values from op2 (at bit positions [7:0] and [23:16]),
* zero-extend them to 16-bits each, and add the results to op1
* @param[in] op1 Two 16-bit values in op1[15:0] and op1[31:16]
* @param[in] op2 Two 8-bit values in op2[7:0] and op2[23:16] to be zero-extended
* @param[in] ror Number of bits to rotate op2. Only 8,16 and 24 are accepted
* @returns The addition of op1 and op2, where op2 has been rotated, the 8-bit values in op2[7:0]
* and op2[23:16] have been extracted and zero-extended prior to the addition
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTAB16_RORn (const uint32_t op1, const uint32_t op2, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("uxtab16 %0, %1, %2, ROR %3" : "=r" (result) : "r" (op1) , "r" (op2) , "i" (ror) );
return result;
}
/**
* @brief Rotate right, dual extract 8-bits and zero extend each to 16-bits
* @param[in] op1 Two 8-bit values in op1[7:0] and op1[23:16] to be zero-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns The 8-bit values zero-extended to 16-bit values
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTB16_RORn (const uint32_t op1, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("uxtb16 %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Sign extend Halfword
* @details Extends a 16-bit value to a signed 32-bit value
* @param[in] op1 op1[15:0] to be sign-extended
* @returns Register holding the sign-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __SXTH (const uint32_t op1)
{
uint32_t result;
__ASM volatile ("sxth %0, %1" : "=r" (result) : "r" (op1));
return result;
}
/**
* @brief Zero extend Halfword
* @details Extends a 16-bit value to an unsigned 32-bit value
* @param[in] op1 op1[15:0] to be zero-extended
* @returns Register holding the zero-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTH (const uint32_t op1)
{
uint32_t result;
__ASM volatile ("uxth %0, %1" : "=r" (result) : "r" (op1));
return result;
}
/**
* @brief Rotate right and sign extend halfword
* @param[in] op1 op1[15:0] to be sign-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns Register holding the sign-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __SXTH_RORn (const uint32_t op1, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("sxth %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Rotate right and zero extend halfword
* @param[in] op1 op1[15:0] to be zero-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns Register holding the zero-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTH_RORn (const uint32_t op1, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("uxth %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Sign extend Byte
* @details Extends a 8-bit value to a signed 32-bit value
* @param[in] op1 op1[7:0] to be sign-extended
* @returns Register holding the sign-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __SXTB (const uint32_t op1)
{
uint32_t result;
__ASM volatile ("sxtb %0, %1" : "=r" (result) : "r" (op1));
return result;
}
/**
* @brief Zero extend Byte
* @details Extends a 8-bit value to an unsigned 32-bit value
* @param[in] op1 op1[7:0] to be zero-extended
* @returns Register holding the zero-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTB (const uint32_t op1)
{
uint32_t result;
__ASM volatile ("uxtb %0, %1" : "=r" (result) : "r" (op1));
return result;
}
/**
* @brief Rotate right and sign extend byte
* @param[in] op1 op1[7:0] to be sign-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns Register holding the sign-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __SXTB_RORn (const uint32_t op1, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("sxtb %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Rotate right and zero extend byte
* @param[in] op1 op1[7:0] to be zero-extended
* @param[in] ror Number of bits to rotate op1. Only 8,16 and 24 are accepted
* @returns Register holding the zero-extended 32-bit value
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UXTB_RORn (const uint32_t op1, const int8_t ror)
{
uint32_t result;
assert((ror == 0) || (ror == 8) || (ror == 16) || (ror == 24));
__ASM volatile ("uxtb %0, %1, ROR %2" : "=r" (result) : "r" (op1), "i" (ror) );
return result;
}
/**
* @brief Signed Bit Field Extract
* @details Copies adjacent bits from one register into the least significant bits
* of a second register, and sign extends to 32 bits
* @param[in] op1 Value to be extracted
* @param[in] lsb Position of the least significant bit of the bit field
* @param[in] width Width of the bit field
* @returns Extracted bitfield and sign extended to 32 bits
*
*/
__attribute__((always_inline)) __STATIC_INLINE
int32_t __SBFX (const uint32_t op1, const int8_t lsb, const int8_t width)
{
int32_t result;
assert((lsb >= 0) && (lsb < 32) && (width >= 0) && (width < 32-lsb));
__ASM volatile ("sbfx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "i" (lsb), "i" (width) );
return result;
}
/**
* @brief Unsigned Bit Field Extract
* @details Copies adjacent bits from one register into the least significant bits
* of a second register, and zero extends to 32 bits
* @param[in] op1 Value to be extracted
* @param[in] lsb Position of the least significant bit of the bit field
* @param[in] width Width of the bit field
* @returns Extracted bitfield and zero extended to 32 bits
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UBFX (const uint32_t op1, const int8_t lsb, const int8_t width)
{
uint32_t result;
assert((lsb >= 0) && (lsb < 32) && (width >= 0) && (width < 32-lsb));
__ASM volatile ("ubfx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "i" (lsb), "i" (width) );
return result;
}
/**
* @brief Bit Field Insert
* @details Copies a bitfield into one register from another register
* It replaces width bits in op2 starting at the position lsb,
* with width bits from op1 starting at bit[0].
* Other bits in op2 are unchanged
* @param[in] op1 Source value
* @param[in,out] op2 Destination value
* @param[in] lsb Position of the least significant bit of the bit field
* @param[in] width Width of the bit field
* @returns The register which contains op2 and the added bitfield
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __BFI (uint32_t op1, uint32_t op2, const int8_t lsb, const int8_t width)
{
assert((lsb >= 0) && (lsb < 32) && (width >= 0) && (width < 32-lsb));
__ASM volatile ("bfi %0, %1, %2, %3" : "+r" (op2) : "r" (op1), "i" (lsb), "i" (width), "0" (op2) );
return op2;
}
/**
* @brief Signed Divide
* @details Performs a signed integer division of the value in op1
* by the value in op2.
* @param[in] op1 Register holding the value to be divided
* @param[in] op2 Register holding the divisor
* @returns Register holding the signed result op1/op2
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __SDIV (const uint32_t op1, const uint32_t op2)
{
uint32_t result;
__ASM volatile ("sdiv %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return result;
}
/**
* @brief Unsigned Divide
* @details Performs an unsigned integer division of the value in op1
* by the value in op2.
* @param[in] op1 Register holding the value to be divided
* @param[in] op2 Register holding the divisor
* @returns Register holding the unsigned result op1/op2
*
*/
__attribute__((always_inline)) __STATIC_INLINE
uint32_t __UDIV (const uint32_t op1, const uint32_t op2)
{
uint32_t result;
__ASM volatile ("udiv %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
return result;
}
#ifdef __cplusplus
}
#endif
#endif
/*
(C) Copyright 2015 CEA LIST. All Rights Reserved.
Contributor(s): N2D2 Team
This software is governed by the CeCILL-C license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL-C
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-C license and that you accept its terms.
*/
#ifndef __N2D2_TYPEDEFS_H__
#define __N2D2_TYPEDEFS_H__
#include <stdint.h>
typedef enum {
HWC,
CHW
} Format_T;
typedef enum {
Logistic,
LogisticWithLoss,
FastSigmoid,
Tanh,
TanhLeCun,
Saturation,
Rectifier,
Linear,
Softplus
} ActivationFunction_T;
typedef enum {
Max,
Average
} Pooling_T;
typedef enum {
Sum,
Mult
} OpMode_T;
typedef enum {
PerLayer,
PerInput,
PerChannel
} CoeffMode_T;
#endif // __N2D2_TYPEDEFS_H__
This diff is collapsed.
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include "typedefs.h" #include "kernels/typedefs.hpp"
namespace N2D2_Export { namespace N2D2_Export {
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
#ifndef {{ name|upper }}_LAYER_H #ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H
#include "typedefs.h" #include "kernels/nn_scaling_functions.hpp"
#include "nn_scaling_functions.hpp"
{% include "./_def_io.jinja" %} {% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %} {% include "./_meminfo.jinja" %}
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
#ifndef {{ name|upper }}_LAYER_H #ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H
#include "typedefs.h" #include "kernels/nn_scaling_functions.hpp"
#include "nn_scaling_functions.hpp"
{# For layer configuration -#} {# For layer configuration -#}
{% include "./_def_io.jinja" %} {% include "./_def_io.jinja" %}
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#ifndef {{ name|upper }}_LAYER_H #ifndef {{ name|upper }}_LAYER_H
#define {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H
#include "typedefs.h" {# #include "typedefs.h" #}
{# For layer configuration -#} {# For layer configuration -#}
{% include "./_def_io.jinja" %} {% include "./_def_io.jinja" %}
......
#include <stdint.h>
#ifdef SAVE_OUTPUTS
#include <sys/types.h>
#include <sys/stat.h>
#endif
#include "include/forward.hpp"
// Layer & memory configurations
{%- for header in headers %}
#include "{{ header }}"
{%- endfor %}
// blabla
// Memory block
{%- if mem_section == None %}
static {{mem_ctype}} mem[{{peak_mem}}];
{%- else %}
static {{mem_ctype}} mem[{{peak_mem}}] __attribute__((section("{{ mem_section }}")));
{%- endif %}
{# Forward function #}
{#- Support multiple inputs with different datatypes and multiple outputs with different datatypes -#}
void {{ func_name }} (
{%- for i in range(inputs_name | length) -%}
const {{ inputs_dtype[i] }}* {{ inputs_name[i] }},
{%- endfor -%}
{%- for o in range(outputs_name | length) -%}
{{ outputs_dtype[o] }}** {{ outputs_name[o] }}_ptr{% if not loop.last %}, {% endif %}
{%- endfor -%})
{
{%- for action in actions %}
{{ action }}
{%- endfor %}
{%- for output_name in outputs_name %}
*{{ output_name }}_ptr = {{ output_name }};
{%- endfor %}
}
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
N2D2_Export::lowbitconvcellPropagate<{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH,
{{ out_name[0]|upper }}_NB_OUTPUTS,
{{ out_name[0]|upper }}_OUT_HEIGHT,
{{ out_name[0]|upper }}_OUT_WIDTH,
{{ name|upper }}_PADDING_Y,
{{ name|upper }}_PADDING_X,
{{ name|upper }}_STRIDE_Y,
{{ name|upper }}_STRIDE_X,
{{ name|upper }}_KERNEL_HEIGHT,
{{ name|upper }}_KERNEL_WIDTH,
{{ name|upper }}_ACTIVATION, data<32>>
({{in_name[0]}}, {{out_name[0]}}, {{in_name[2]}}, {{in_name[1]}}, {{ name|upper }}_SCALING);
{% endfilter %}
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
N2D2_Export::lowbitfccellPropagate<{{ in_name[0] | upper }}_NB_CHANNELS,
{{ in_name[0] | upper }}_IN_HEIGHT,
{{ in_name[0] | upper }}_IN_WIDTH,
{{ out_name[0] | upper }}_NB_OUTPUTS,
{{ out_name[0] | upper }}_OUT_HEIGHT,
{{ out_name[0] | upper }}_OUT_WIDTH,
{{name|upper}}_ACTIVATION,
data<32>>
({{ in_name[0] }}, {{ out_name[0] }}, {{ in_name[2] }}, {{ in_name[1] }}, {{ name | upper }}_SCALING);
{% endfilter %}
\ No newline at end of file
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
N2D2_Export::lowbitpoolcellPropagate<{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH,
{{ out_name[0]|upper }}_NB_OUTPUTS,
{{ out_name[0]|upper }}_OUT_HEIGHT,
{{ out_name[0]|upper }}_OUT_WIDTH,
{{ name|upper }}_PADDING_Y,
{{ name|upper }}_PADDING_X,
{{ name|upper }}_STRIDE_Y,
{{ name|upper }}_STRIDE_X,
{{ name|upper }}_KERNEL_HEIGHT,
{{ name|upper }}_KERNEL_WIDTH,
{{ name|upper }}_POOLING_TYPE,
{{ name|upper }}_ACTIVATION>
({{in_name[0]}}, {{out_name[0]}});
{% endfilter %}
/**
******************************************************************************
* @file stm32f7xx.h
* @author MCD Application Team
* @brief CMSIS STM32F7xx Device Peripheral Access Layer Header File.
*
* The file is the unique include file that the application programmer
* is using in the C source code, usually in main.c. This file contains:
* - Configuration section that allows to select:
* - The STM32F7xx device used in the target application
* - To use or not the peripheral's drivers in application code(i.e.
* code will be based on direct access to peripheral's registers
* rather than drivers API), this option is controlled by
* "#define USE_HAL_DRIVER"
*
******************************************************************************
* @attention
*
* Copyright (c) 2016 STMicroelectronics.
* All rights reserved.
*
* This software is licensed under terms that can be found in the LICENSE file
* in the root directory of this software component.
* If no LICENSE file comes with this software, it is provided AS-IS.
*
******************************************************************************
*/
/** @addtogroup CMSIS
* @{
*/
/** @addtogroup stm32f7xx
* @{
*/
#ifndef __STM32F7xx_H
#define __STM32F7xx_H
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** @addtogroup Library_configuration_section
* @{
*/
/**
* @brief STM32 Family
*/
#if !defined (STM32F7)
#define STM32F7
#endif /* STM32F7 */
/* Uncomment the line below according to the target STM32 device used in your
application
*/
#if !defined (STM32F756xx) && !defined (STM32F746xx) && !defined (STM32F745xx) && !defined (STM32F765xx) && \
!defined (STM32F767xx) && !defined (STM32F769xx) && !defined (STM32F777xx) && !defined (STM32F779xx) && \
!defined (STM32F722xx) && !defined (STM32F723xx) && !defined (STM32F732xx) && !defined (STM32F733xx) && \
!defined (STM32F730xx) && !defined (STM32F750xx)
/* #define STM32F756xx */ /*!< STM32F756VG, STM32F756ZG, STM32F756ZG, STM32F756IG, STM32F756BG,
STM32F756NG Devices */
/* #define STM32F746xx */ /*!< STM32F746VE, STM32F746VG, STM32F746ZE, STM32F746ZG, STM32F746IE, STM32F746IG,
STM32F746BE, STM32F746BG, STM32F746NE, STM32F746NG Devices */
/* #define STM32F745xx */ /*!< STM32F745VE, STM32F745VG, STM32F745ZG, STM32F745ZE, STM32F745IE, STM32F745IG Devices */
/* #define STM32F765xx */ /*!< STM32F765BI, STM32F765BG, STM32F765NI, STM32F765NG, STM32F765II, STM32F765IG,
STM32F765ZI, STM32F765ZG, STM32F765VI, STM32F765VG Devices */
/* #define STM32F767xx */ /*!< STM32F767BG, STM32F767BI, STM32F767IG, STM32F767II, STM32F767NG, STM32F767NI,
STM32F767VG, STM32F767VI, STM32F767ZG, STM32F767ZI Devices */
/* #define STM32F769xx */ /*!< STM32F769AG, STM32F769AI, STM32F769BG, STM32F769BI, STM32F769IG, STM32F769II,
STM32F769NG, STM32F769NI, STM32F768AI Devices */
/* #define STM32F777xx */ /*!< STM32F777VI, STM32F777ZI, STM32F777II, STM32F777BI, STM32F777NI Devices */
/* #define STM32F779xx */ /*!< STM32F779II, STM32F779BI, STM32F779NI, STM32F779AI, STM32F778AI Devices */
/* #define STM32F722xx */ /*!< STM32F722IE, STM32F722ZE, STM32F722VE, STM32F722RE, STM32F722IC, STM32F722ZC,
STM32F722VC, STM32F722RC Devices */
/* #define STM32F723xx */ /*!< STM32F723IE, STM32F723ZE, STM32F723VE, STM32F723IC, STM32F723ZC, STM32F723VC Devices */
/* #define STM32F732xx */ /*!< STM32F732IE, STM32F732ZE, STM32F732VE, STM32F732RE Devices */
/* #define STM32F733xx */ /*!< STM32F733IE, STM32F733ZE, STM32F733VE Devices */
/* #define STM32F730xx */ /*!< STM32F730R, STM32F730V, STM32F730Z, STM32F730I Devices */
/* #define STM32F750xx */ /*!< STM32F750V, STM32F750Z, STM32F750N Devices */
#endif
/* Tip: To avoid modifying this file each time you need to switch between these
devices, you can define the device in your toolchain compiler preprocessor.
*/
#if !defined (USE_HAL_DRIVER)
/**
* @brief Comment the line below if you will not use the peripherals drivers.
In this case, these drivers will not be included and the application code will
be based on direct access to peripherals registers
*/
/*#define USE_HAL_DRIVER */
#endif /* USE_HAL_DRIVER */
/**
* @brief CMSIS Device version number V1.2.9
*/
#define __STM32F7_CMSIS_VERSION_MAIN (0x01) /*!< [31:24] main version */
#define __STM32F7_CMSIS_VERSION_SUB1 (0x02) /*!< [23:16] sub1 version */
#define __STM32F7_CMSIS_VERSION_SUB2 (0x09) /*!< [15:8] sub2 version */
#define __STM32F7_CMSIS_VERSION_RC (0x00) /*!< [7:0] release candidate */
#define __STM32F7_CMSIS_VERSION ((__STM32F7_CMSIS_VERSION_MAIN << 24)\
|(__STM32F7_CMSIS_VERSION_SUB1 << 16)\
|(__STM32F7_CMSIS_VERSION_SUB2 << 8 )\
|(__STM32F7_CMSIS_VERSION_RC))
/**
* @}
*/
/** @addtogroup Device_Included
* @{
*/
#if defined(STM32F722xx)
#include "stm32f722xx.h"
#elif defined(STM32F723xx)
#include "stm32f723xx.h"
#elif defined(STM32F732xx)
#include "stm32f732xx.h"
#elif defined(STM32F733xx)
#include "stm32f733xx.h"
#elif defined(STM32F756xx)
#include "stm32f756xx.h"
#elif defined(STM32F746xx)
#include "stm32f746xx.h"
#elif defined(STM32F745xx)
#include "stm32f745xx.h"
#elif defined(STM32F765xx)
#include "stm32f765xx.h"
#elif defined(STM32F767xx)
#include "stm32f767xx.h"
#elif defined(STM32F769xx)
#include "stm32f769xx.h"
#elif defined(STM32F777xx)
#include "stm32f777xx.h"
#elif defined(STM32F779xx)
#include "stm32f779xx.h"
#elif defined(STM32F730xx)
#include "stm32f730xx.h"
#elif defined(STM32F750xx)
#include "stm32f750xx.h"
#else
#error "Please select first the target STM32F7xx device used in your application (in stm32f7xx.h file)"
#endif
/**
* @}
*/
/** @addtogroup Exported_types
* @{
*/
typedef enum
{
RESET = 0U,
SET = !RESET
} FlagStatus, ITStatus;
typedef enum
{
DISABLE = 0U,
ENABLE = !DISABLE
} FunctionalState;
#define IS_FUNCTIONAL_STATE(STATE) (((STATE) == DISABLE) || ((STATE) == ENABLE))
typedef enum
{
SUCCESS = 0U,
ERROR = !SUCCESS
} ErrorStatus;
/**
* @}
*/
/** @addtogroup Exported_macro
* @{
*/
#define SET_BIT(REG, BIT) ((REG) |= (BIT))
#define CLEAR_BIT(REG, BIT) ((REG) &= ~(BIT))
#define READ_BIT(REG, BIT) ((REG) & (BIT))
#define CLEAR_REG(REG) ((REG) = (0x0))
#define WRITE_REG(REG, VAL) ((REG) = (VAL))
#define READ_REG(REG) ((REG))
#define MODIFY_REG(REG, CLEARMASK, SETMASK) WRITE_REG((REG), (((READ_REG(REG)) & (~(CLEARMASK))) | (SETMASK)))
#define POSITION_VAL(VAL) (__CLZ(__RBIT(VAL)))
/* Use of CMSIS compiler intrinsics for register exclusive access */
/* Atomic 32-bit register access macro to set one or several bits */
#define ATOMIC_SET_BIT(REG, BIT) \
do { \
uint32_t val; \
do { \
val = __LDREXW((__IO uint32_t *)&(REG)) | (BIT); \
} while ((__STREXW(val,(__IO uint32_t *)&(REG))) != 0U); \
} while(0)
/* Atomic 32-bit register access macro to clear one or several bits */
#define ATOMIC_CLEAR_BIT(REG, BIT) \
do { \
uint32_t val; \
do { \
val = __LDREXW((__IO uint32_t *)&(REG)) & ~(BIT); \
} while ((__STREXW(val,(__IO uint32_t *)&(REG))) != 0U); \
} while(0)
/* Atomic 32-bit register access macro to clear and set one or several bits */
#define ATOMIC_MODIFY_REG(REG, CLEARMSK, SETMASK) \
do { \
uint32_t val; \
do { \
val = (__LDREXW((__IO uint32_t *)&(REG)) & ~(CLEARMSK)) | (SETMASK); \
} while ((__STREXW(val,(__IO uint32_t *)&(REG))) != 0U); \
} while(0)
/* Atomic 16-bit register access macro to set one or several bits */
#define ATOMIC_SETH_BIT(REG, BIT) \
do { \
uint16_t val; \
do { \
val = __LDREXH((__IO uint16_t *)&(REG)) | (BIT); \
} while ((__STREXH(val,(__IO uint16_t *)&(REG))) != 0U); \
} while(0)
/* Atomic 16-bit register access macro to clear one or several bits */
#define ATOMIC_CLEARH_BIT(REG, BIT) \
do { \
uint16_t val; \
do { \
val = __LDREXH((__IO uint16_t *)&(REG)) & ~(BIT); \
} while ((__STREXH(val,(__IO uint16_t *)&(REG))) != 0U); \
} while(0)
/* Atomic 16-bit register access macro to clear and set one or several bits */
#define ATOMIC_MODIFYH_REG(REG, CLEARMSK, SETMASK) \
do { \
uint16_t val; \
do { \
val = (__LDREXH((__IO uint16_t *)&(REG)) & ~(CLEARMSK)) | (SETMASK); \
} while ((__STREXH(val,(__IO uint16_t *)&(REG))) != 0U); \
} while(0)
/**
* @}
*/
#ifdef USE_HAL_DRIVER
#include "stm32f7xx_hal.h"
#endif /* USE_HAL_DRIVER */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* __STM32F7xx_H */
/**
* @}
*/
/**
* @}
*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment