Skip to content
Snippets Groups Projects
Commit c08a3fa7 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Fixed Windows build

parent b57e889e
No related branches found
No related tags found
2 merge requests!166Update 0.5.0 -> 0.6.0,!158Added OpenMP
...@@ -79,8 +79,8 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD ...@@ -79,8 +79,8 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32) #pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(dims[0]); ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) { for (int ch = 0; ch < static_cast<int>(dims[1]); ++ch) {
const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize; const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3]; const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3];
......
...@@ -56,8 +56,8 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std ...@@ -56,8 +56,8 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (nbBatch * nbChannels > 32) #pragma omp parallel for collapse(2) if (nbBatch * nbChannels > 32)
#endif #endif
for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (int batch = 0; batch < static_cast<int>(nbBatch); ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) { for (int ch = 0; ch < static_cast<int>(nbChannels); ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon)); const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
......
...@@ -68,8 +68,8 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri ...@@ -68,8 +68,8 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize; const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0); B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
...@@ -158,8 +158,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -158,8 +158,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0); B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s; std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
...@@ -201,8 +201,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -201,8 +201,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0); B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s; std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
...@@ -226,8 +226,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -226,8 +226,8 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * inputDims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { for (int ch = 0; ch < static_cast<int>(inputDims[1]); ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s; const std::size_t oIndex = (ch + batch*inputDims[1]) * outChannels_s;
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
......
...@@ -62,8 +62,8 @@ void ConvImpl1D_cpu_forward_kernel(const array<DimSize_t, 1> &strideDim, ...@@ -62,8 +62,8 @@ void ConvImpl1D_cpu_forward_kernel(const array<DimSize_t, 1> &strideDim,
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (int outCh = 0; outCh < static_cast<int>(outChannels); ++outCh) {
const std::size_t oIndex = (outCh + batch * outChannels) * oxSize; const std::size_t oIndex = (outCh + batch * outChannels) * oxSize;
// If bias = nullptr, set B(0) // If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0); B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
...@@ -484,8 +484,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims, ...@@ -484,8 +484,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims,
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (int outCh = 0; outCh < static_cast<int>(outChannels); ++outCh) {
std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s; std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s;
// If bias = nullptr, set B(0) // If bias = nullptr, set B(0)
...@@ -573,8 +573,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims, ...@@ -573,8 +573,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims,
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (int outCh = 0; outCh < static_cast<int>(outChannels); ++outCh) {
std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s; std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s;
// If bias = nullptr, set B(0) // If bias = nullptr, set B(0)
...@@ -609,8 +609,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims, ...@@ -609,8 +609,8 @@ void ConvImpl2D_cpu_forward_kernel(const array<DimSize_t, 2> &strideDims,
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32) #pragma omp parallel for collapse(2) if (inputDims[0] * outChannels > 32)
#endif #endif
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(inputDims[0]); ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (int outCh = 0; outCh < static_cast<int>(outChannels); ++outCh) {
std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s; std::size_t oIndex = (outCh + batch*inputDims[1]) * outChannels_s;
// If bias = nullptr, set B(0) // If bias = nullptr, set B(0)
......
...@@ -77,8 +77,8 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(const std::shared_ptr<Tensor>& ...@@ -77,8 +77,8 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(const std::shared_ptr<Tensor>&
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32) #pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32)
#endif #endif
for (DimSize_t batch = 0; batch < dims[0]; ++batch) { for (int batch = 0; batch < static_cast<int>(dims[0]); ++batch) {
for (DimSize_t channel = 0; channel < dims[1]; ++channel) { for (int channel = 0; channel < static_cast<int>(dims[1]); ++channel) {
const I *filter_start = std::next( const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems)); output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems));
......
...@@ -69,8 +69,8 @@ void MaxPoolingImpl2D_cpu_forward_kernel( ...@@ -69,8 +69,8 @@ void MaxPoolingImpl2D_cpu_forward_kernel(
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32) #pragma omp parallel for collapse(2) if (dims[0] * dims[1] > 32)
#endif #endif
for (std::size_t batch = 0; batch < dims[0]; ++batch){ for (int batch = 0; batch < static_cast<int>(dims[0]); ++batch){
for (std::size_t channel = 0; channel < dims[1]; ++channel){ for (int channel = 0; channel < static_cast<int>(dims[1]); ++channel){
auto batchChannelIndex = (channel + batch * dims[1]); auto batchChannelIndex = (channel + batch * dims[1]);
const std::size_t outputBaseIndex = batchChannelIndex * outXSize * outYSize; const std::size_t outputBaseIndex = batchChannelIndex * outXSize * outYSize;
const std::size_t inputBaseIndex = batchChannelIndex * dims[2] * dims[3]; const std::size_t inputBaseIndex = batchChannelIndex * dims[2] * dims[3];
......
...@@ -40,8 +40,8 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi ...@@ -40,8 +40,8 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for collapse(2) if (preAxisElems * postAxisElems > 32) #pragma omp parallel for collapse(2) if (preAxisElems * postAxisElems > 32)
#endif #endif
for (std::size_t i = 0; i < preAxisElems; ++i) { for (int i = 0; i < static_cast<int>(preAxisElems); ++i) {
for (std::size_t j = 0; j < postAxisElems; ++j) { for (int j = 0; j < static_cast<int>(postAxisElems); ++j) {
I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j]; I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j];
for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) { for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) {
std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment