/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include <array> #include <catch2/catch_test_macros.hpp> #include "Test_cuda.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/backend/cpu.hpp" #include "aidge/backend/cuda.hpp" using namespace Aidge; TEST_CASE("CUDA test") { const int N = 100; // Allocate host memory float* a = new float[N](); float* b = new float[N](); float* out = new float[N](); // Initialize host arrays for(int i = 0; i < N; i++){ a[i] = 1.0f; b[i] = 2.0f; } // Allocate device memory float *d_a, *d_b, *d_out; cudaMalloc(reinterpret_cast<void**>(&d_a), sizeof(float) * N); cudaMalloc(reinterpret_cast<void**>(&d_b), sizeof(float) * N); cudaMalloc(reinterpret_cast<void**>(&d_out), sizeof(float) * N); // Transfer data from host to device memory cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice); cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice); // Executing kernel vector_add(d_out, d_a, d_b, N); // Transfer data back to host memory cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost); // Verification for(int i = 0; i < N; i++){ REQUIRE(fabs(out[i] - a[i] - b[i]) < 1e-6); } // Deallocate device memory cudaFree(d_a); cudaFree(d_b); cudaFree(d_out); // Deallocate host memory delete[] a; delete[] b; delete[] out; } TEST_CASE("Tensor creation", "[Connector]") { SECTION("from const array") { Tensor x; x.setBackend("cuda"); x = Array3D<int,2,2,2>{ { { {1, 2}, {3, 4} }, { {5, 6}, {7, 8} } }}; REQUIRE(x.nbDims() == 3); REQUIRE(x.dims()[0] == 2); REQUIRE(x.dims()[1] == 2); REQUIRE(x.dims()[2] == 2); REQUIRE(x.size() == 8); std::array<int, 8> val; cudaMemcpy(&val[0], x.getImpl()->rawPtr(), 8 * sizeof(int), cudaMemcpyDeviceToHost); REQUIRE(val[0] == 1); REQUIRE(val[7] == 8); } SECTION("from const array before backend") { Tensor x = Array3D<int,2,2,2>{ { { {1, 2}, {3, 4} }, { {5, 6}, {7, 8} } }}; x.setBackend("cuda"); REQUIRE(x.nbDims() == 3); REQUIRE(x.dims()[0] == 2); REQUIRE(x.dims()[1] == 2); REQUIRE(x.dims()[2] == 2); REQUIRE(x.size() == 8); std::array<int, 8> val; cudaMemcpy(&val[0], x.getImpl()->rawPtr(), 8 * sizeof(int), cudaMemcpyDeviceToHost); REQUIRE(val[0] == 1); REQUIRE(val[7] == 8); } }