-
Olivier BICHLER authoredOlivier BICHLER authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Test_TensorImpl.cpp 3.12 KiB
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
using namespace Aidge;
TEST_CASE("CUDA test") {
const int N = 100;
// Allocate host memory
float* a = new float[N]();
float* b = new float[N]();
float* out = new float[N]();
// Initialize host arrays
for(int i = 0; i < N; i++){
a[i] = 1.0f;
b[i] = 2.0f;
}
// Allocate device memory
float *d_a, *d_b, *d_out;
cudaMalloc(reinterpret_cast<void**>(&d_a), sizeof(float) * N);
cudaMalloc(reinterpret_cast<void**>(&d_b), sizeof(float) * N);
cudaMalloc(reinterpret_cast<void**>(&d_out), sizeof(float) * N);
// Transfer data from host to device memory
cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);
// Executing kernel
vector_add(d_out, d_a, d_b, N);
// Transfer data back to host memory
cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);
// Verification
for(int i = 0; i < N; i++){
REQUIRE(fabs(out[i] - a[i] - b[i]) < 1e-6);
}
// Deallocate device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_out);
// Deallocate host memory
delete[] a;
delete[] b;
delete[] out;
}
TEST_CASE("Tensor creation", "[Connector]") {
SECTION("from const array") {
Tensor x;
x.setBackend("cuda");
x = Array3D<int,2,2,2>{
{
{
{1, 2},
{3, 4}
},
{
{5, 6},
{7, 8}
}
}};
REQUIRE(x.nbDims() == 3);
REQUIRE(x.dims()[0] == 2);
REQUIRE(x.dims()[1] == 2);
REQUIRE(x.dims()[2] == 2);
REQUIRE(x.size() == 8);
std::array<int, 8> val;
cudaMemcpy(&val[0], x.getImpl()->rawPtr(), 8 * sizeof(int), cudaMemcpyDeviceToHost);
REQUIRE(val[0] == 1);
REQUIRE(val[7] == 8);
}
SECTION("from const array before backend") {
Tensor x = Array3D<int,2,2,2>{
{
{
{1, 2},
{3, 4}
},
{
{5, 6},
{7, 8}
}
}};
x.setBackend("cuda");
REQUIRE(x.nbDims() == 3);
REQUIRE(x.dims()[0] == 2);
REQUIRE(x.dims()[1] == 2);
REQUIRE(x.dims()[2] == 2);
REQUIRE(x.size() == 8);
std::array<int, 8> val;
cudaMemcpy(&val[0], x.getImpl()->rawPtr(), 8 * sizeof(int), cudaMemcpyDeviceToHost);
REQUIRE(val[0] == 1);
REQUIRE(val[7] == 8);
}
}