diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp index 90b22c5fa8526115122fef9a0f58322af513b302..aa533786d3ce5b6f5cd501b6ba74b1be2823d407 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp @@ -25,7 +25,7 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -#pragma omp parallel for if (inputLenght > 1024) +//#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = input[i] > 0 ? input[i] : 0; } diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp index 96303312aae067c6955c96331f7cd7d959de53a7..a53650942540e6368855ffe19e2f7f651ab5b6bc 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp @@ -25,7 +25,7 @@ void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -#pragma omp parallel for if (inputLenght > 1024) +//#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = static_cast<O>(1.0) / (static_cast<O>(1.0) + std::exp(-input[i])); } diff --git a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp index 3012aae9e4a8a587efde0b8221b8c55c4d832345..9e57b6dfcb0da322f5b21944fb10ec7a10cd0ab8 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp @@ -25,7 +25,7 @@ void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -#pragma omp parallel for if (inputLenght > 1024) +//#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = std::tanh(input[i]); } diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 292a3b56682889051fd48b53382e5030f4e1ee50..729aff2452b46f00eb6d3e0b558c0b3d58ea2f0e 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -91,16 +91,10 @@ void Aidge::DivImpl_cpu::forward() { std::size_t contiguousIdx = nbDims - 1; for (; contiguousIdx+1 > 0; --contiguousIdx) { if (dims0[contiguousIdx] != dims1[contiguousIdx]) { - if (contiguousIdx == (nbDims -1)) { - if (dims0[contiguousIdx] == 1) { - while ((dims0[contiguousIdx] == 1) && (contiguousIdx+1 > 0)) { - --contiguousIdx; - } - } - else { - while ((dims1[contiguousIdx] == 1) && (contiguousIdx+1 > 0)) { - --contiguousIdx; - } + if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1; + while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; } } break; diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index bc4a7a7cab91049c623e9a9e95ee63367da00722..995245907c8c87b0367c7edfa4493bd6b7faf660 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -57,9 +57,10 @@ void Aidge::FCImpl_cpu::forward() const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); // Call kernel + const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), - input0.dims()[0], - input0.size() / input0.dims()[0], + batchSize, + input0.size() / batchSize, input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), getCPUPtr(mOp.getRawOutput(0))); }