Skip to content
Snippets Groups Projects
Commit fc36e10e authored by Houssem ROUIS's avatar Houssem ROUIS Committed by Maxence Naud
Browse files

remove matmul attrs and update kernel

parent c2eb9b34
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!34Matmul rework
This commit is part of merge request !34. Comments created here will be created in the context of that merge request.
...@@ -27,12 +27,12 @@ namespace Aidge { ...@@ -27,12 +27,12 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class MatMulImplForward_cpu class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImplBackward_cpu class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl { class MatMulImpl_cpu : public OperatorImpl {
......
...@@ -19,38 +19,54 @@ ...@@ -19,38 +19,54 @@
namespace Aidge { namespace Aidge {
template <class I, class W, class O> template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, void MatMulImpl_cpu_forward_kernel(const std::vector<DimSize_t>& input1Dims,const std::vector<DimSize_t>& input2Dims,
const void* input_, const void* weights_, void* output_) { const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments // FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_); const I* input1 = static_cast<const I*>(input1_);
const W* weights = static_cast<const W*>(weights_); const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
size_t secondToLastIdx1 = input1Dims.size() > 1 ? input1Dims.size() - 2 : 0;
size_t secondToLastIdx2 = input2Dims.size() > 1 ? input2Dims.size() - 2 : 0;
// Checking if matrix dimensions are compatible for multiplication
assert(input1Dims.back() == input2Dims[secondToLastIdx2] && "Matrix dimensions are not compatible for multiplication");
// Extracting dimensions
size_t rows1 = 1, cols1 = 1, cols2 = 1;
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); // For input1
for (size_t i = 0; i < input1Dims.size() - 1; ++i) {
rows1 *= input1Dims[i];
}
cols1 = input1Dims.back();
// For input2
for (size_t i = 1; i < input2Dims.size(); ++i) {
cols2 *= input2Dims[i];
}
for (std::size_t batch = 0; batch < batchSize; ++batch) { // Multiplication
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { for (size_t i = 0; i < rows1; ++i) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, for (size_t j = 0; j < cols2; ++j) {
input + (batch + 1)*oneInputSize, float sum = 0.0;
weights + out*oneInputSize, for (size_t k = 0; k < cols1; ++k) {
output[out + batch*std::get<0>(attrs)]); sum += input1[i * cols1 + k] * input2[k * cols2 + j];
}
output[i * cols2 + j] = sum;
} }
} }
} }
namespace { namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32}, {DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace } // namespace
} // namespace Aidge } // namespace Aidge
......
...@@ -30,24 +30,12 @@ void Aidge::MatMulImpl_cpu::forward() ...@@ -30,24 +30,12 @@ void Aidge::MatMulImpl_cpu::forward()
// Find the correct kernel type // Find the correct kernel type
auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
// if (mOp.getInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(),
// mOp.getInput(0))->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// getCPUPtr(mOp.getRawOutput(0));
// }
// else
kernelFunc( kernelFunc(
dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0))); getCPUPtr(mOp.getRawOutput(0)));
......
...@@ -20,92 +20,126 @@ ...@@ -20,92 +20,126 @@
using namespace Aidge; using namespace Aidge;
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
// Test MatMul forward with batch size = 2 and feature size = 75 SECTION("2D Tensors") {
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, {
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, {0.16672266, 0.39773488},
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, {0.83746278, 0.54205710}
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, }
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, });
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, {
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {0.50658345, 0.04777747},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, {0.22279310, 0.41348755}
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, }
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, });
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, {
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, {0.17307153, 0.17242400},
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, {0.54501140, 0.26414573}
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, }
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, });
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, std::shared_ptr<Node> myMatMul = MatMul();
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ op->associateInput(0, input_1);
{{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}}); op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul");
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(1, myWeights);
SECTION("2D input") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
{75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
op->setBackend("cpu"); op->setBackend("cpu");
op->computeOutputDims(); op->computeOutputDims();
myMatMul->forward(); myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput); expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
} }
SECTION("4D input") {
std::shared_ptr<Tensor> myInput =
std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, SECTION("3D Tensor by 1D Tensor") {
{5, 6, 7, 8, 9}, std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
{10, 11, 12, 13, 14}, {
{15, 16, 17, 18, 19}, {{0.82786506, 0.19047028, 0.62954658},
{20, 21, 22, 23, 24}}, {0.63160968, 0.12468684, 0.49015969}},
{{25, 26, 27, 28, 29},
{30, 31, 32, 33, 34}, {{0.49215794, 0.42231840, 0.02699018},
{35, 36, 37, 38, 39}, {0.66403216, 0.94622904, 0.42048711}}
{40, 41, 42, 43, 44}, }
{45, 46, 47, 48, 49}}, });
{{50, 51, 52, 53, 54}, std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{55, 56, 57, 58, 59}, {0.82458717, 0.88598752, 0.78737932}
{60, 61, 62, 63, 64}, });
{65, 66, 67, 68, 69}, std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{70, 71, 72, 73, 74}}}, {
{{{75, 76, 77, 78, 79}, {1.34709311, 1.01722980},
{80, 81, 82, 83, 84}, {0.80124742, 1.71698236}
{85, 86, 87, 88, 89}, }
{90, 91, 92, 93, 94}, });
{95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104}, std::shared_ptr<Node> myMatMul = MatMul();
{105, 106, 107, 108, 109}, auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
{110, 111, 112, 113, 114}, op->associateInput(0, input_1);
{115, 116, 117, 118, 119}, op->associateInput(1, input_2);
{120, 121, 122, 123, 124}}, op->setDataType(DataType::Float32);
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
op->setBackend("cpu"); op->setBackend("cpu");
op->computeOutputDims(); op->computeOutputDims();
myMatMul->forward(); myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput); expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
} }
// std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; SECTION("3D Tensor by 2D Tensor") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> {
{
{
{0.53427607, 0.69181818, 0.30088913},
{0.20866227, 0.67821276, 0.25695610}
}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,3,4>{
{
{0.03158629, 0.21031839, 0.95692378, 0.05287921},
{0.66182911, 0.91662365, 0.07928377, 0.86983263},
{0.12386280, 0.63736272, 0.15963674, 0.465079722}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> {
{
{
{0.51201022, 0.93828046, 0.61414438, 0.76995558},
{0.48727912, 0.82932562, 0.29446477, 0.72047055}
}
}
});
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
}
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment