Skip to content
Snippets Groups Projects
Commit ca89e12d authored by Houssem ROUIS's avatar Houssem ROUIS Committed by Benjamin Halimi
Browse files

Remove input_nbr attribute for Add operator

parent 87a17ffa
No related branches found
No related tags found
1 merge request!54Fix the BatchNorm operator
......@@ -22,48 +22,27 @@
using namespace Aidge;
TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{ //
{ //
{{20, 47},{21, 48},{22, 49}}, //
{{23, 50},{24, 51},{25, 52}}, //
{{26, 53},{27, 54},{28, 55}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{32, 59},{33, 60},{34, 61}}, //
{{35, 62},{36, 63},{37, 64}} //
}, //
{ //
{{38, 65},{39, 66},{40, 67}}, //
{{41, 68},{42, 69},{43, 70}}, //
{{44, 71},{45, 72},{46, 73}} //
} //
} //
}); //
input1->setBackend("cuda");
SECTION("One input") {
std::shared_ptr<Node> myAdd = Add(1);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->setBackend("cuda");
op->setDataType(DataType::Float32);
myAdd->forward();
float* computedOutput = new float[input1->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost);
float* targetOutput = new float[input1->size()]();
cudaMemcpy(targetOutput, input1->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < input1->size(); i++){
REQUIRE(fabs(computedOutput[i] - targetOutput[i]) < 1e-6);
}
delete[] computedOutput;
delete[] targetOutput;
}
SECTION("Two inputs") {
SECTION("Same input") {
std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{ //
{ //
{{20, 47},{21, 48},{22, 49}}, //
{{23, 50},{24, 51},{25, 52}}, //
{{26, 53},{27, 54},{28, 55}} //
}, //
{ //
{{29, 56},{30, 57},{31, 58}}, //
{{32, 59},{33, 60},{34, 61}}, //
{{35, 62},{36, 63},{37, 64}} //
}, //
{ //
{{38, 65},{39, 66},{40, 67}}, //
{{41, 68},{42, 69},{43, 70}}, //
{{44, 71},{45, 72},{46, 73}} //
} //
} //
}); //
input1->setBackend("cuda");
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{
{
......@@ -84,7 +63,7 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
}
});
std::shared_ptr<Node> myAdd = Add(2);
std::shared_ptr<Node> myAdd = Add();
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input1);
......@@ -103,47 +82,6 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
delete[] computedOutput;
}
SECTION("Three inputs") {
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{
{
{{ 60, 141},{ 63, 144},{ 66, 147}},
{{ 69, 150},{ 72, 153},{ 75, 156}},
{{ 78, 159},{ 81, 162},{ 84, 165}}
},
{
{{ 87, 168},{ 90, 171},{ 93, 174}},
{{ 96, 177},{ 99, 180},{102, 183}},
{{105, 186},{108, 189},{111, 192}}
},
{
{{114, 195},{117, 198},{120, 201}},
{{123, 204},{126, 207},{129, 210}},
{{132, 213},{135, 216},{138, 219}}
}
}
});
std::shared_ptr<Node> myAdd = Add(3);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input1);
op->associateInput(1, input1);
op->associateInput(2, input1);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
myAdd->forward();
float* computedOutput = new float[input1->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < expectedOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
SECTION("Broadcasting") {
std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> {
{ //
......@@ -168,47 +106,80 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{ //
{ //
{{ 120, 222},{ 124, 226},{ 128, 230}}, //
{{ 126, 228},{ 130, 232},{ 134, 236}}, //
{{ 132, 234},{ 136, 238},{ 140, 242}} //
}, //
{ //
{{ 126, 228},{ 130, 232},{ 134, 236}}, //
{{ 132, 234},{ 136, 238},{ 140, 242}}, //
{{ 138, 240},{ 142, 244},{ 146, 248}} //
}, //
{ //
{{ 132, 234},{ 136, 238},{140, 242}}, //
{{ 138, 240},{ 142, 244},{146, 248}}, //
{{ 144, 246},{ 148, 250},{152, 254}} //
} //
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});
input_0->setBackend("cuda");
input_1->setBackend("cuda");
input_2->setBackend("cuda");
std::shared_ptr<Node> myAdd = Add(3);
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input_0);
op->associateInput(1, input_1);
op->associateInput(2, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
myAdd->forward();
float* computedOutput = new float[input1->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost);
/// Input0(d0, 1, d2, d3) + Input1(1, d1, d2, d3) = Output(d0, d1, d2, d3)
std::shared_ptr<Tensor> expectedOutput0 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
{ //
{ //
{{ 20, 22},{ 24, 26},{ 28, 30}}, //
{{ 26, 28},{ 30, 32},{ 34, 36}}, //
{{ 32, 34},{ 36, 38},{ 40, 42}} //
}, //
{ //
{{ 26, 28},{ 30, 32},{ 34, 36}}, //
{{ 32, 34},{ 36, 38},{ 40, 42}}, //
{{ 38, 40},{ 42, 44},{ 46, 48}} //
}, //
{ //
{{ 32, 34},{ 36, 38},{40, 42}}, //
{{ 38, 40},{ 42, 44},{46, 48}}, //
{{ 44, 46},{ 48, 50},{52, 54}} //
} //
} //
}); //
for(int i = 0; i < expectedOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
std::shared_ptr<Node> myAdd0 = Add();
auto op0 = std::static_pointer_cast<OperatorTensor>(myAdd0 -> getOperator());
op0->associateInput(0, input_0);
op0->associateInput(1, input_1);
op0->setDataType(DataType::Float32);
op0->setBackend("cuda");
myAdd0->forward();
float* computedOutput0 = new float[expectedOutput0->size()]();
cudaMemcpy(computedOutput0, op0->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput0->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < expectedOutput0->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedOutput0->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput0[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
delete[] computedOutput0;
/// Input0(d0, d1, d2, d3) + Input1(d3) = Output(d0, d1, d2, d3)
std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> {
{ //
{ //
{{100, 201},{102, 203},{104, 205}} //
}, //
{ //
{{106, 207},{108, 209},{110, 211}} //
}, //
{ //
{{112, 213},{114, 215},{116, 217}} //
} //
} //
}); //
std::shared_ptr<Node> myAdd1 = Add();
auto op1 = std::static_pointer_cast<OperatorTensor>(myAdd1 -> getOperator());
op1->associateInput(0, input_0);
op1->associateInput(1, input_2);
op1->setDataType(DataType::Float32);
op1->setBackend("cuda");
myAdd1->forward();
float* computedOutput1 = new float[expectedOutput1->size()]();
cudaMemcpy(computedOutput1, op1->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput1->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < expectedOutput1->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedOutput1->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput1[i] - targetOutput) < 1e-6);
}
delete[] computedOutput1;
}
SECTION("Random Input") {
......@@ -231,11 +202,11 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial)
{
// Create Add Operator CUDA
std::shared_ptr<Node> myAddCUDA = Add(2, "myaddcuda");
std::shared_ptr<Node> myAddCUDA = Add("myaddcuda");
auto op_cuda = std::static_pointer_cast<OperatorTensor>(myAddCUDA -> getOperator());
// Create Add Operator CPU
std::shared_ptr<Node> myAddCPU = Add(2, "myaddcpu");
std::shared_ptr<Node> myAddCPU = Add("myaddcpu");
auto op_cpu = std::static_pointer_cast<OperatorTensor>(myAddCPU -> getOperator());
op_cpu->setDataType(DataType::Float32);
op_cpu->setBackend("cpu");
......@@ -360,16 +331,12 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
} //
}); //
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});
input_0->setBackend("cuda");
input_1->setBackend("cuda");
input_2->setBackend("cuda");
std::shared_ptr<Node> myAdd = Add(3);
std::shared_ptr<Node> myAdd = Add();
auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
op->associateInput(0, input_0);
op->associateInput(1, input_1);
op->associateInput(2, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
myAdd->forward();
......@@ -420,14 +387,11 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
} //
} //
}); //
std::shared_ptr<Tensor> expectedInput3Grad = std::make_shared<Tensor>(Array1D<float,2> {{729, 756}});
float *computedGrad1Cuda = new float[expectedInput1Grad->size()]();
cudaMemcpy(computedGrad1Cuda, op->getInput(0)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput1Grad->size(), cudaMemcpyDeviceToHost);
float *computedGrad2Cuda = new float[expectedInput2Grad->size()]();
cudaMemcpy(computedGrad2Cuda, op->getInput(1)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput2Grad->size(), cudaMemcpyDeviceToHost);
float *computedGrad3Cuda = new float[expectedInput3Grad->size()]();
cudaMemcpy(computedGrad3Cuda, op->getInput(2)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput3Grad->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < expectedInput1Grad->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedInput1Grad->getImpl()->rawPtr()) + i);
......@@ -437,12 +401,7 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
const float targetOutput = *(static_cast<float*>(expectedInput2Grad->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedGrad2Cuda[i] - targetOutput) < 1e-6);
}
for(int i = 0; i < expectedInput3Grad->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedInput3Grad->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedGrad3Cuda[i] - targetOutput) < 1e-6);
}
delete[] computedGrad1Cuda;
delete[] computedGrad2Cuda;
delete[] computedGrad3Cuda;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment