Skip to content
Snippets Groups Projects
Commit fc36e10e authored by Houssem ROUIS's avatar Houssem ROUIS Committed by Maxence Naud
Browse files

remove matmul attrs and update kernel

parent c2eb9b34
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!34Matmul rework
......@@ -27,12 +27,12 @@ namespace Aidge {
// compute kernel registry for forward and backward
class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {};
class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl {
......
......@@ -19,38 +19,54 @@
namespace Aidge {
template <class I, class W, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, void* output_) {
template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const std::vector<DimSize_t>& input1Dims,const std::vector<DimSize_t>& input2Dims,
const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const I* input1 = static_cast<const I*>(input1_);
const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_);
size_t secondToLastIdx1 = input1Dims.size() > 1 ? input1Dims.size() - 2 : 0;
size_t secondToLastIdx2 = input2Dims.size() > 1 ? input2Dims.size() - 2 : 0;
// Checking if matrix dimensions are compatible for multiplication
assert(input1Dims.back() == input2Dims[secondToLastIdx2] && "Matrix dimensions are not compatible for multiplication");
// Extracting dimensions
size_t rows1 = 1, cols1 = 1, cols2 = 1;
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0));
// For input1
for (size_t i = 0; i < input1Dims.size() - 1; ++i) {
rows1 *= input1Dims[i];
}
cols1 = input1Dims.back();
// For input2
for (size_t i = 1; i < input2Dims.size(); ++i) {
cols2 *= input2Dims[i];
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
// Multiplication
for (size_t i = 0; i < rows1; ++i) {
for (size_t j = 0; j < cols2; ++j) {
float sum = 0.0;
for (size_t k = 0; k < cols1; ++k) {
sum += input1[i * cols1 + k] * input2[k * cols2 + j];
}
output[i * cols2 + j] = sum;
}
}
}
namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>);
{DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>);
{DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>);
{DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
......
......@@ -30,24 +30,12 @@ void Aidge::MatMulImpl_cpu::forward()
// Find the correct kernel type
auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
// if (mOp.getInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(),
// mOp.getInput(0))->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// getCPUPtr(mOp.getRawOutput(0));
// }
// else
kernelFunc(
dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
......
......@@ -20,92 +20,126 @@
using namespace Aidge;
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
// Test MatMul forward with batch size = 2 and feature size = 75
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
{{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul");
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(1, myWeights);
SECTION("2D input") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
{75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
SECTION("2D Tensors") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.16672266, 0.39773488},
{0.83746278, 0.54205710}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{
{
{0.50658345, 0.04777747},
{0.22279310, 0.41348755}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.17307153, 0.17242400},
{0.54501140, 0.26414573}
}
});
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
}
SECTION("4D input") {
std::shared_ptr<Tensor> myInput =
std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
{5, 6, 7, 8, 9},
{10, 11, 12, 13, 14},
{15, 16, 17, 18, 19},
{20, 21, 22, 23, 24}},
{{25, 26, 27, 28, 29},
{30, 31, 32, 33, 34},
{35, 36, 37, 38, 39},
{40, 41, 42, 43, 44},
{45, 46, 47, 48, 49}},
{{50, 51, 52, 53, 54},
{55, 56, 57, 58, 59},
{60, 61, 62, 63, 64},
{65, 66, 67, 68, 69},
{70, 71, 72, 73, 74}}},
{{{75, 76, 77, 78, 79},
{80, 81, 82, 83, 84},
{85, 86, 87, 88, 89},
{90, 91, 92, 93, 94},
{95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
SECTION("3D Tensor by 1D Tensor") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
{
{{0.82786506, 0.19047028, 0.62954658},
{0.63160968, 0.12468684, 0.49015969}},
{{0.49215794, 0.42231840, 0.02699018},
{0.66403216, 0.94622904, 0.42048711}}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{0.82458717, 0.88598752, 0.78737932}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{1.34709311, 1.01722980},
{0.80124742, 1.71698236}
}
});
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
}
// std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl;
SECTION("3D Tensor by 2D Tensor") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> {
{
{
{0.53427607, 0.69181818, 0.30088913},
{0.20866227, 0.67821276, 0.25695610}
}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,3,4>{
{
{0.03158629, 0.21031839, 0.95692378, 0.05287921},
{0.66182911, 0.91662365, 0.07928377, 0.86983263},
{0.12386280, 0.63736272, 0.15963674, 0.465079722}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> {
{
{
{0.51201022, 0.93828046, 0.61414438, 0.76995558},
{0.48727912, 0.82932562, 0.29446477, 0.72047055}
}
}
});
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment