Skip to content
Snippets Groups Projects
Commit 53e9162f authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'scheduling' into 'dev'

Improved scheduling

See merge request !45
parents f805a9af cd558133
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!45Improved scheduling
Pipeline #42701 passed
Showing
with 34 additions and 52 deletions
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
return std::make_unique<AddImpl_cpu>(op); return std::make_unique<AddImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; Elts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -44,7 +44,7 @@ public: ...@@ -44,7 +44,7 @@ public:
return std::make_unique<AvgPoolingImpl2D_cpu>(op); return std::make_unique<AvgPoolingImpl2D_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
return std::make_unique<BatchNormImpl2D_cpu>(op); return std::make_unique<BatchNormImpl2D_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -48,18 +48,6 @@ public: ...@@ -48,18 +48,6 @@ public:
} }
public: public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward() override; void forward() override;
void backward() override; void backward() override;
......
...@@ -46,7 +46,7 @@ public: ...@@ -46,7 +46,7 @@ public:
return std::make_unique<ConvDepthWiseImpl2D_cpu>(op); return std::make_unique<ConvDepthWiseImpl2D_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -47,7 +47,7 @@ class ConvImpl2D_cpu : public OperatorImpl { ...@@ -47,7 +47,7 @@ class ConvImpl2D_cpu : public OperatorImpl {
} }
public: public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -40,7 +40,7 @@ public: ...@@ -40,7 +40,7 @@ public:
return std::make_unique<DivImpl_cpu>(op); return std::make_unique<DivImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override final; void forward() override final;
}; };
......
...@@ -38,7 +38,7 @@ public: ...@@ -38,7 +38,7 @@ public:
return std::make_unique<ErfImpl_cpu>(op); return std::make_unique<ErfImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -38,7 +38,6 @@ public: ...@@ -38,7 +38,6 @@ public:
return std::make_unique<GatherImpl_cpu>(op); return std::make_unique<GatherImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -49,12 +49,12 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( ...@@ -49,12 +49,12 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(
for (DimSize_t channel = 0; channel < dims[1]; ++channel) { for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
const I *filter_start = std::next( const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
I sum = 0; I mean = 0;
for (size_t i = 0; i < in_channel_nb_elems; ++i) { for (size_t i = 0; i < in_channel_nb_elems; ++i) {
sum += filter_start[i]; // Single pass numerically stable mean, using the fmaf
mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean);
} }
output[batch * out_batch_nb_elems + channel] = output[batch * out_batch_nb_elems + channel] = mean;
sum / static_cast<I>(in_channel_nb_elems);
} }
} }
} }
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
return std::make_unique<LeakyReLUImpl_cpu>(op); return std::make_unique<LeakyReLUImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override final; void forward() override final;
......
...@@ -44,7 +44,7 @@ public: ...@@ -44,7 +44,7 @@ public:
return std::make_unique<MaxPoolingImpl2D_cpu>(op); return std::make_unique<MaxPoolingImpl2D_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -29,8 +29,8 @@ public: ...@@ -29,8 +29,8 @@ public:
return std::make_unique<MemorizeImpl_cpu>(op); return std::make_unique<MemorizeImpl_cpu>(op);
} }
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx, Elts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const override final; const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const override final;
void updateConsummerProducer() override final; void updateConsummerProducer() override final;
void forward() override; void forward() override;
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
return std::make_unique<MulImpl_cpu>(op); return std::make_unique<MulImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -46,7 +46,7 @@ public: ...@@ -46,7 +46,7 @@ public:
return std::make_unique<PadImpl2D_cpu>(op); return std::make_unique<PadImpl2D_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
return std::make_unique<PopImpl_cpu>(op); return std::make_unique<PopImpl_cpu>(op);
} }
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
return std::make_unique<PowImpl_cpu>(op); return std::make_unique<PowImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
void backward() override; void backward() override;
}; };
......
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
return std::make_unique<ReLUImpl_cpu>(op); return std::make_unique<ReLUImpl_cpu>(op);
} }
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override final; void forward() override final;
......
...@@ -44,7 +44,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { ...@@ -44,7 +44,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl {
} }
public: public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
}; };
...@@ -87,7 +86,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { ...@@ -87,7 +86,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl {
// } // }
// public: // public:
// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
// void forward() override; // void forward() override;
// }; // };
...@@ -100,7 +98,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { ...@@ -100,7 +98,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl {
// } // }
// public: // public:
// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
// void forward() override; // void forward() override;
// }; // };
...@@ -113,7 +110,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { ...@@ -113,7 +110,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl {
// } // }
// public: // public:
// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
// void forward() override; // void forward() override;
// }; // };
namespace { namespace {
......
...@@ -47,22 +47,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr ...@@ -47,22 +47,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr
for (std::size_t post = 0; post < stride_post; ++post) { for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post; const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post; const std::size_t idx_o = pre * stride_post + post;
output[idx_o] = input[idx_i]; O mean = 0;
for (std::size_t i = 1; i < dim_i; ++i) { for (std::size_t i = 0; i < dim_i; ++i) {
output[idx_o] += input[idx_i + i*stride_post]; // Single pass numerically stable mean, using the fmaf
mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean);
} }
output[idx_o] /= dim_i; output[idx_o] = mean;
} }
} }
} else { } else {
std::size_t outputElements = totalElements; std::size_t outputElements = totalElements;
std::size_t *stride_post = new std::size_t[nb_dims]; auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_post[nb_dims - 1] = 1; stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1]; stride_post[i] = stride_post[i+1]*inputDims[i+1];
} }
std::size_t *stride_pre = new std::size_t[nb_dims]; auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_pre[0] = 1; stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) { for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
...@@ -80,13 +81,15 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr ...@@ -80,13 +81,15 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr
for (std::size_t post = 0; post < stride_post[a]; ++post) { for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post; const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post;
outputAccumulation[idx_o] = inputAccumulation[idx_i]; I mean = 0;
for (std::size_t i = 1; i < dim_i; ++i) { for (std::size_t i = 0; i < dim_i; ++i) {
outputAccumulation[idx_o] += inputAccumulation[idx_i + i*stride_post[a]]; // Single pass numerically stable mean, using the fmaf
mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean);
} }
outputAccumulation[idx_o] = mean;
} }
} }
std::for_each(stride_pre+a+1, stride_pre+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) { if (inputAccumulation != input) {
delete[] inputAccumulation; delete[] inputAccumulation;
} }
...@@ -94,14 +97,10 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr ...@@ -94,14 +97,10 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr
} }
// Copy elements from inputAccumulation to output while dividing by divisor // Copy elements from inputAccumulation to output while dividing by divisor
I divisor = totalElements / outputElements; std::copy(inputAccumulation, inputAccumulation + outputElements, output);
std::transform(inputAccumulation, inputAccumulation + outputElements, output,
[divisor](I element) { return element / divisor; });
if (outputAccumulation) { if (outputAccumulation) {
delete[] outputAccumulation; delete[] outputAccumulation;
} }
delete[] stride_post;
delete[] stride_pre;
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment