diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
index 90b22c5fa8526115122fef9a0f58322af513b302..aa533786d3ce5b6f5cd501b6ba74b1be2823d407 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
@@ -25,7 +25,7 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
-#pragma omp parallel for if (inputLenght > 1024)
+//#pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = input[i] > 0 ? input[i] : 0;
     }
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
index 96303312aae067c6955c96331f7cd7d959de53a7..a53650942540e6368855ffe19e2f7f651ab5b6bc 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
@@ -25,7 +25,7 @@ void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
-#pragma omp parallel for if (inputLenght > 1024)
+//#pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = static_cast<O>(1.0) / (static_cast<O>(1.0) + std::exp(-input[i]));
     }
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp
index 3012aae9e4a8a587efde0b8221b8c55c4d832345..9e57b6dfcb0da322f5b21944fb10ec7a10cd0ab8 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp
@@ -25,7 +25,7 @@ void TanhImpl_cpu_forward_kernel(std::size_t inputLenght,
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
-#pragma omp parallel for if (inputLenght > 1024)
+//#pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = std::tanh(input[i]);
     }
diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp
index 292a3b56682889051fd48b53382e5030f4e1ee50..729aff2452b46f00eb6d3e0b558c0b3d58ea2f0e 100644
--- a/src/operator/DivImpl.cpp
+++ b/src/operator/DivImpl.cpp
@@ -91,16 +91,10 @@ void Aidge::DivImpl_cpu::forward() {
     std::size_t contiguousIdx = nbDims - 1;
     for (; contiguousIdx+1 > 0; --contiguousIdx) {
         if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
-            if (contiguousIdx == (nbDims -1)) {
-                if (dims0[contiguousIdx] == 1) {
-                    while ((dims0[contiguousIdx] == 1) && (contiguousIdx+1 > 0)) {
-                        --contiguousIdx;
-                    }
-                }
-                else {
-                    while ((dims1[contiguousIdx] == 1) && (contiguousIdx+1 > 0)) {
-                        --contiguousIdx;
-                    }
+            if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
+                const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
+                while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
+                    --contiguousIdx;
                 }
             }
             break;
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index bc4a7a7cab91049c623e9a9e95ee63367da00722..995245907c8c87b0367c7edfa4493bd6b7faf660 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -57,9 +57,10 @@ void Aidge::FCImpl_cpu::forward()
     const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
 
     // Call kernel
+    const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
     kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
-        input0.dims()[0],
-        input0.size() / input0.dims()[0],
+        batchSize,
+        input0.size() / batchSize,
         input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
         getCPUPtr(mOp.getRawOutput(0)));
 }