Test_ILayerNormImpl.cpp

/********************************************************************************
 * Copyright (c) 2024 Thales
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * SPDX-License-Identifier: EPL-2.0
 * Author: Lucas RAKOTOARIVONY, Thales Research & Technology France
 * Date: 10.09.2024
 *
 ********************************************************************************/

#include <array>

#include <catch2/catch_test_macros.hpp>

#include "Test_cuda.hpp"

#include "aidge/data/Tensor.hpp"

#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"

using namespace Aidge;

TEST_CASE("[gpu/operator] ILayerNorm(forward)", "[ILayerNorm][GPU]") {
    SECTION("4D Tensor") {
        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
            {
                {
                    {
                        {0.96, 0.48, 0.54, 0.49, 0.59, 0.93, 0.00, 0.00, 0.61, 0.61},
                        {0.85, 0.06, 0.11, 0.87, 0.55, 0.12, 0.80, 0.48, 0.41, 0.16}
                    },
                    {
                        {0.24, 0.46, 0.97, 0.19, 0.65, 0.12, 0.44, 1.00, 0.37, 0.09},
                        {0.44, 0.64, 0.21, 0.58, 0.05, 0.24, 0.56, 0.07, 0.49, 0.79}
                    }
                },
                {
                    {
                        {0.00, 0.13, 0.55, 0.42, 0.49, 0.28, 0.52, 0.55, 0.34, 0.85},
                        {0.98, 0.32, 0.09, 0.05, 0.37, 0.47, 0.63, 0.13, 0.70, 0.02}
                    },
                    {
                        {0.69, 0.13, 0.74, 0.61, 0.25, 0.87, 0.46, 0.40, 0.81, 0.06},
                        {0.89, 0.32, 0.61, 0.24, 0.70, 0.23, 0.09, 0.03, 0.14, 0.80}
                    }
                }
            }
        });

        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float, 10>{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}});
        std::shared_ptr<Tensor> myWeight = std::make_shared<Tensor>(Array1D<float, 10>{{0.1617684f, 0.3833238f ,-0.6842308f ,-0.4342245f ,-0.4717381f ,-0.1776187f, -0.2728751f, -0.4638580f, 0.2936697f, -0.9011016f}});

        myWeight->setBackend("cuda");
        myBias->setBackend("cuda");

        std::shared_ptr<Node> myILayerNorm = ILayerNorm();
        auto op = std::static_pointer_cast<OperatorTensor>(myILayerNorm -> getOperator());

        op -> associateInput(1, myWeight);
        op -> associateInput(2, myBias);

        input0->setBackend("cuda");

        op -> associateInput(0,input0);
        op->setDataType(DataType::Float32);
        op->setBackend("cuda");
        op->forward();

        // expected output
        std::shared_ptr<Tensor> output_ilayernorm = std::make_shared<Tensor>(Array4D<float,2,2,2,10> {
        {
            {
                {
                    {9.8821178e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02},
                    {4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00}
                },
                {
                    {0.0000000e+00, 4.9410585e-02, 9.8821178e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 9.8821178e-02, 4.9410585e-02, 0.0000000e+00},
                    {4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02}
                }
            },
            {
                {
                    {0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02},
                    {9.8821178e-02, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00}
                },
                {
                    {4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00},
                    {4.9410585e-02, 4.9410585e-02, 4.9410585e-02, 0.0000000e+00, 4.9410585e-02, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.9410585e-02}
                }
            }
        }
    });


        float* computedOutput   = new float[output_ilayernorm->size()]();
        cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * output_ilayernorm->size(), cudaMemcpyDeviceToHost);

        //test if forward result are as expected
        for(int i = 0; i < output_ilayernorm->size(); i++){
            const float targetOutput = *(static_cast<float*>(output_ilayernorm->getImpl()->rawPtr()) + i);
            REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
        }

        }

}

TEST_CASE("[gpu/operator] ILayerNorm(backward)", "[ILayerNorm][GPU]")

{   
    std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
            {
                    {
                        {
                            {1.46650600,  1.24083233, -0.33106008, -0.15137172, 0.06625678, -1.8326609, 0.53444749, -0.05167147},
                        },
                    },
            }
        });
    
    std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
            {
                    {
                        {
                            {0.96, 0.54, 0.22, -0.15, 0.17, 0.26, -0.85, 0.5},
                        },
                    },
            }
        });
    
    std::shared_ptr<Tensor> myWeight = std::make_shared<Tensor>(Array4D<float,1,1,1,8> { //NCHW
            {
                    {
                        {
                            {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0},
                        },
                    },
            }
        });
    

        myWeight->setBackend("cuda");
        myBias->setBackend("cuda");

        std::shared_ptr<Node> myILayerNorm = ILayerNorm();
        auto op = std::static_pointer_cast<OperatorTensor>(myILayerNorm -> getOperator());

        op -> associateInput(1, myWeight);
        op -> associateInput(2, myBias);

        input0->setBackend("cuda");

        op -> associateInput(0,input0);
        op->setDataType(DataType::Float32);
        op->setBackend("cuda");
        myILayerNorm->forward();

    std::shared_ptr<Tensor> myOutputGrad = std::make_shared<Tensor>(Array4D<float,1,1,1,8> {
            {
                {
                    {
                        { 1.34347093,  0.90813798, 0.39607167,  1.20428133, 0.16845724,  0.48487359, 0.40748054, -0.21790814},
                    },
                },
            }
        });


    myOutputGrad->setBackend("cuda");
    std::shared_ptr<Tensor> predictedOutput = op->getOutput(0);
    std::shared_ptr<Tensor> input = op->getInput(0);
    predictedOutput->setGrad(myOutputGrad);
    REQUIRE_NOTHROW(myILayerNorm->backward());

    std::shared_ptr<Tensor> expectedInputGradILayerNorm = std::make_shared<Tensor>(Array4D<float,1,1,1,8> {
            {
                {
                    {
                        { 0.467678, 0.310749, 0.1129, 0.351786, 0.0507252, 0.101587, 0.130249, -0.0646476},
                    },
                },
            }
        });


    float *computedInputGradCuda = new float[myOutputGrad->size()]();
    cudaMemcpy(computedInputGradCuda, op->getInput(0)->grad()->getImpl()->rawPtr(), sizeof(float) * myOutputGrad->size(), cudaMemcpyDeviceToHost);

    //test if backward result are as expected
    for(int i = 0; i < expectedInputGradILayerNorm->size(); i++){
        const float targetOutput = *(static_cast<float*>(expectedInputGradILayerNorm->getImpl()->rawPtr()) + i);
        REQUIRE(fabs(computedInputGradCuda[i] - targetOutput) < 2e-6);  
    }

    delete[] computedInputGradCuda;
}