-
Lucas RAKOTOARIVONY authoredLucas RAKOTOARIVONY authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Test_ILayerNormImpl.cpp 7.84 KiB
/********************************************************************************
* Copyright (c) 2024 Thales
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
* Author: Lucas RAKOTOARIVONY, Thales Research & Technology France
* Date: 10.09.2024
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
using namespace Aidge;
TEST_CASE("[gpu/operator] ILayerNorm(forward)", "[ILayerNorm][GPU]") {
SECTION("4D Tensor") {
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
{
{
{
{0.96, 0.48, 0.54, 0.49, 0.59, 0.93, 0.00, 0.00, 0.61, 0.61},
{0.85, 0.06, 0.11, 0.87, 0.55, 0.12, 0.80, 0.48, 0.41, 0.16}
},
{
{0.24, 0.46, 0.97, 0.19, 0.65, 0.12, 0.44, 1.00, 0.37, 0.09},
{0.44, 0.64, 0.21, 0.58, 0.05, 0.24, 0.56, 0.07, 0.49, 0.79}
}
},
{
{
{0.00, 0.13, 0.55, 0.42, 0.49, 0.28, 0.52, 0.55, 0.34, 0.85},
{0.98, 0.32, 0.09, 0.05, 0.37, 0.47, 0.63, 0.13, 0.70, 0.02}
},
{
{0.69, 0.13, 0.74, 0.61, 0.25, 0.87, 0.46, 0.40, 0.81, 0.06},
{0.89, 0.32, 0.61, 0.24, 0.70, 0.23, 0.09, 0.03, 0.14, 0.80}
}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float, 10>{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}});
std::shared_ptr<Tensor> myWeight = std::make_shared<Tensor>(Array1D<float, 10>{{0.1617684f, 0.3833238f ,-0.6842308f ,-0.4342245f ,-0.4717381f ,-0.1776187f, -0.2728751f, -0.4638580f, 0.2936697f, -0.9011016f}});
myWeight->setBackend("cuda");
myBias->setBackend("cuda");
std::shared_ptr<Node> myILayerNorm = ILayerNorm();
auto op = std::static_pointer_cast<OperatorTensor>(myILayerNorm -> getOperator());
op -> associateInput(1, myWeight);
op -> associateInput(2, myBias);
input0->setBackend("cuda");
op -> associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op->forward();
// expected output
std::shared_ptr<Tensor> output_ilayernorm = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
{
{
{
{9.8821178e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02},
{4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00}
},
{
{0.0000000e+00, 4.9410585e-02, 9.8821178e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 9.8821178e-02, 4.9410585e-02, 0.0000000e+00},
{4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02}
}
},
{
{
{0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02},
{9.8821178e-02, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00}
},
{
{4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00},
{4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02}
}
}
}
});
float* computedOutput = new float[output_ilayernorm->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * output_ilayernorm->size(), cudaMemcpyDeviceToHost);
//test if forward result are as expected
for(int i = 0; i < output_ilayernorm->size(); i++){
const float targetOutput = *(static_cast<float*>(output_ilayernorm->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
}
}
TEST_CASE("[gpu/operator] ILayerNorm(backward)", "[ILayerNorm][GPU]")
{
std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
{
{
{
{1.46650600, 1.24083233, -0.33106008, -0.15137172, 0.06625678, -1.8326609, 0.53444749, -0.05167147},
},
},
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
{
{
{
{0.96, 0.54, 0.22, -0.15, 0.17, 0.26, -0.85, 0.5},
},
},
}
});
std::shared_ptr<Tensor> myWeight = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
{
{
{
{1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0},
},
},
}
});
myWeight->setBackend("cuda");
myBias->setBackend("cuda");
std::shared_ptr<Node> myILayerNorm = ILayerNorm();
auto op = std::static_pointer_cast<OperatorTensor>(myILayerNorm -> getOperator());
op -> associateInput(1, myWeight);
op -> associateInput(2, myBias);
input0->setBackend("cuda");
op -> associateInput(0,input0);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
myILayerNorm->forward();
std::shared_ptr<Tensor> myOutputGrad = std::make_shared<Tensor>(Array4D<float,1,1,1,8> {
{
{
{
{ 1.34347093, 0.90813798, 0.39607167, 1.20428133, 0.16845724, 0.48487359, 0.40748054, -0.21790814},
},
},
}
});
myOutputGrad->setBackend("cuda");
std::shared_ptr<Tensor> predictedOutput = op->getOutput(0);
std::shared_ptr<Tensor> input = op->getInput(0);
predictedOutput->setGrad(myOutputGrad);
REQUIRE_NOTHROW(myILayerNorm->backward());
std::shared_ptr<Tensor> expectedInputGradILayerNorm = std::make_shared<Tensor>(Array4D<float,1,1,1,8> {
{
{
{
{ 0.467678, 0.310749, 0.1129, 0.351786, 0.0507252, 0.101587, 0.130249, -0.0646476},
},
},
}
});
float *computedInputGradCuda = new float[myOutputGrad->size()]();
cudaMemcpy(computedInputGradCuda, op->getInput(0)->grad()->getImpl()->rawPtr(), sizeof(float) * myOutputGrad->size(), cudaMemcpyDeviceToHost);
//test if backward result are as expected
for(int i = 0; i < expectedInputGradILayerNorm->size(); i++){
const float targetOutput = *(static_cast<float*>(expectedInputGradILayerNorm->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedInputGradCuda[i] - targetOutput) < 2e-6);
}
delete[] computedInputGradCuda;
}