Skip to content
Snippets Groups Projects
Commit 0ce89002 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

UPD: version 0.2.2 -> 0.2.3

See merge request !33
parents 86852f74 c19db949
Branches main
Tags v0.2.3
No related merge requests found
Showing
with 223 additions and 78 deletions
# C++ Build
build*/
install*/
include/aidge/learning_version.h
# VSCode
.vscode
......@@ -10,7 +10,6 @@ install*/
__pycache__
*.pyc
*.egg-info
aidge_learning/_version.py
wheelhouse/*
# Mermaid
......
# Version 0.2.3 (January 31, 2025)
# Version 0.2.2 (December 12, 2024)
# Version 0.1.1 (May 14, 2024)
......
cmake_minimum_required(VERSION 3.18)
set(CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
# Parse version.txt to retrieve Major, Minor and Path
string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" _ MATCHES ${version})
set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
# Retrieve latest git commit
execute_process(
COMMAND git rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
project(aidge_learning
VERSION ${version}
DESCRIPTION "Functions and alogrithms to train models in the AIDGE framework"
......@@ -10,6 +28,7 @@ project(aidge_learning
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
message(STATUS "Project version: ${version}")
message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
# Note : project name is {project} and python module name is also {project}
set(module_name _${CMAKE_PROJECT_NAME}) # target name
......@@ -92,6 +111,13 @@ if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
append_coverage_compiler_flags()
endif()
message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/learning_version.h")
# Generate version.h file from config file version.h.in
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in"
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/learning_version.h"
)
##############################################
# Installation instructions
include(GNUInstallDirs)
......
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(aidge_core)
include(${CMAKE_CURRENT_LIST_DIR}/aidge_learning-config-version.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/aidge_learning-targets.cmake)
......@@ -100,7 +100,7 @@ public:
* @note Else, the learning rate is updated using the provided function.
*/
constexpr void update() {
mLR = (mStep++ < mSwitchStep) ?
mLR = (++mStep < mSwitchStep) ?
static_cast<float>(mStep) * mInitialWarmUp :
mStepFunc(mLR, mStep);
};
......
#ifndef AIDGE_UTILS_SYS_INFO_LEARNING_VERSION_INFO_H
#define AIDGE_UTILS_SYS_INFO_LEARNING_VERSION_INFO_H
#include "aidge/utils/Log.hpp"
#include "aidge/learning_version.h"
namespace Aidge {
constexpr inline const char * getLearningProjectVersion(){
return PROJECT_VERSION;
}
constexpr inline const char * getLearningGitHash(){
return PROJECT_GIT_HASH;
}
void showLearningVersion() {
Log::info("Aidge Learning: {} ({}), {} {}", getLearningProjectVersion(), getLearningGitHash(), __DATE__, __TIME__);
// Compiler version
#if defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__);
#elif defined(__ICC) || defined(__INTEL_COMPILER)
/* Intel ICC/ICPC. ------------------------------------------ */
Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
#elif defined(__GNUC__) || defined(__GNUG__)
/* GNU GCC/G++. --------------------------------------------- */
Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
#elif defined(_MSC_VER)
/* Microsoft Visual Studio. --------------------------------- */
Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
#else
Log::info("Unknown compiler\n");
#endif
}
} // namespace Aidge
#endif // AIDGE_UTILS_SYS_INFO_LEARNING_VERSION_INFO_H
#ifndef VERSION_H
#define VERSION_H
namespace Aidge {
static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@;
static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@;
static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@;
static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@";
static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@";
}
#endif // VERSION_H
......@@ -2,21 +2,44 @@
name = "aidge_learning"
description="Functions and alogrithms to train models in the AIDGE framework"
dependencies = []
requires-python = ">= 3.7"
requires-python = ">= 3.8"
readme = "README.md"
license = { file = "LICENSE" }
classifiers = [
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Programming Language :: Python :: 3"
]
dynamic = ["version"] # defined in tool.setuptools_scm
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)",
"Programming Language :: C++",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development"
]
dynamic = ["version"] # defined in pbr
[project.urls]
Homepage = "https://www.deepgreen.ai/en/platform"
Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/"
Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_learning"
Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_learning/-/issues/"
Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_learning/-/releases"
[build-system]
requires = [
"setuptools>=64",
"setuptools_scm[toml]==7.1.0",
"cmake>=3.15.3.post1",
"toml"
"toml",
"pbr"
]
build-backend = "setuptools.build_meta"
......@@ -28,9 +51,6 @@ where = ["."] # list of folders that contain the packages (["."] by default)
include = ["aidge_learning*"] # package names should match these glob patterns (["*"] by default)
exclude = ["aidge_learning.unit_tests*"] # exclude packages matching these glob patterns (empty by default)
namespaces = false # to disable scanning PEP 420 namespaces (true by default)
# SETUPTOOLS_SCM
[tool.setuptools_scm]
write_to = "aidge_learning/_version.py"
#####################################################
# CIBUILDWHEEL
......@@ -100,7 +120,7 @@ persistent = true
# Minimum Python version to use for version dependent checks. Will default to the
# version used to run pylint.
py-version = "3.7"
py-version = "3.8"
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
......
......@@ -22,6 +22,7 @@ void init_SGD(py::module&);
void init_Adam(py::module&);
void init_LRScheduler(py::module&);
void init_Accuracy(py::module&);
void init_LearningSysInfo(py::module&);
void init_Aidge(py::module& m) {
init_Loss(m);
......@@ -29,7 +30,7 @@ void init_Aidge(py::module& m) {
init_SGD(m);
init_Adam(m);
init_Accuracy(m);
init_LearningSysInfo(m);
init_LRScheduler(m);
}
......
#include <pybind11/pybind11.h>
#include "aidge/utils/sys_info/LearningVersionInfo.hpp"
namespace py = pybind11;
namespace Aidge {
void init_LearningSysInfo(py::module& m){
m.def("show_version", &showLearningVersion);
m.def("get_project_version", &getLearningProjectVersion);
m.def("get_git_hash", &getLearningGitHash);
}
}
# pbr file
[metadata]
version = file: version.txt
......@@ -26,9 +26,15 @@ class CMakeExtension(Extension):
class CMakeBuild(build_ext):
def run(self):
# Impose to use the executable of the python
# used to launch setup.py to setup PythonInterp
python_executable = sys.executable
print(f"python executable : {python_executable}")
# This lists the number of processors available on the machine
# The compilation will use half of them
max_jobs = str(ceil(multiprocessing.cpu_count() / 2))
max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs)
cwd = pathlib.Path().absolute()
......@@ -40,18 +46,8 @@ class CMakeBuild(build_ext):
if not build_lib.exists():
build_lib.mkdir(parents=True, exist_ok=True)
os.chdir(str(build_temp))
# Impose to use the executable of the python
# used to launch setup.py to setup PythonInterp
python_executable = sys.executable
print(f"python executable : {python_executable}")
compile_type = (
"Release"
if "AIDGE_PYTHON_BUILD_TYPE" not in os.environ
else os.environ["AIDGE_PYTHON_BUILD_TYPE"]
)
# package_prefix = build_lib if not self.editable_mode else SETUP_DIR
# pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute()
install_path = (
os.path.join(sys.prefix, "lib", "libAidge")
......@@ -59,31 +55,52 @@ class CMakeBuild(build_ext):
else os.environ["AIDGE_INSTALL"]
)
build_gen = (
["-G", os.environ["AIDGE_BUILD_GEN"]]
if "AIDGE_BUILD_GEN" in os.environ
# Read environment variables for CMake options
c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc")
cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++")
build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release")
asan = os.environ.get("AIDGE_ASAN", "OFF")
with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF")
cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "")
build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
build_gen_opts = (
["-G", build_gen]
if build_gen
else []
)
self.spawn(
[
"cmake",
*build_gen,
str(cwd),
"-DTEST=OFF",
f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
f"-DCMAKE_BUILD_TYPE={compile_type}",
"-DPYBIND=ON",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
"-DCOVERAGE=OFF",
]
)
test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF")
os.chdir(str(build_temp))
cmake_cmd = [
"cmake",
*build_gen_opts,
str(cwd),
f"-DTEST={test_onoff}",
f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
f"-DCMAKE_BUILD_TYPE={build_type}",
f"-DCMAKE_C_COMPILER={c_compiler}",
f"-DCMAKE_CXX_COMPILER={cxx_compiler}",
f"-DENABLE_ASAN={asan}",
f"-DCUDA={with_cuda}",
"-DPYBIND=ON",
# f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=1",
"-DCOVERAGE=OFF",
]
# Append architecture-specific arguments if provided
if cmake_arch:
cmake_cmd.append(cmake_arch)
self.spawn(cmake_cmd)
if not self.dry_run:
self.spawn(
["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs]
["cmake", "--build", ".", "--config", build_type, "-j", max_jobs]
)
self.spawn(["cmake", "--install", ".", "--config", compile_type])
self.spawn(["cmake", "--install", ".", "--config", build_type])
os.chdir(str(cwd))
aidge_package = build_lib / (get_project_name())
......
include(FetchContent)
# Catch2 configuration
set(CATCH2_MIN_VERSION 3.3.0)
# Fetch and make available Catch2 for unit testing
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.7.1 # or a later release
)
FetchContent_MakeAvailable(Catch2)
# Try to find system installed Catch2
find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET)
if(NOT Catch2_FOUND)
message(STATUS "Catch2 not found in system, retrieving from git")
Include(FetchContent)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG devel # or a later release
)
FetchContent_MakeAvailable(Catch2)
message(STATUS "Fetched Catch2 version ${Catch2_VERSION}")
else()
message(STATUS "Using system Catch2 version ${Catch2_VERSION}")
endif()
# Gather all source files for the test executable
file(GLOB_RECURSE src_files "*.cpp")
......
......@@ -73,9 +73,12 @@ TEST_CASE("[learning/LR] Construction & evolution", "[LRScheduler]") {
// profiling
std::vector<float> profile = myLR.lr_profiling(nbSteps);
// Validate profiling results against ground truth
REQUIRE(truth == profile);
// learning rate computation
std::size_t step = 0;
for (; (step < nbSteps) && (truth[step] == profile[step]) && (truth[step] == myLR.learningRate()); ++step) {
for (; (step < nbSteps) && (truth[step] == myLR.learningRate()); ++step) {
myLR.update();
}
......
......@@ -69,7 +69,7 @@ TEST_CASE("[loss/classification] BCE", "[loss][classification][BCE]") {
for (std::size_t i = 0; i < nb_elements; ++i) {
tmp_res_manual[i] = - ((targ[i] + eps1) * std::log(pred[i] + eps1) + (1.0f - targ[i] + eps2) * std::log(1.0f - pred[i] + eps2));
}
std::cout << "Output manual:" << std::endl;
fmt::println("Output manual:");
std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims);
tmp_tensor->setBackend("cpu");
tmp_tensor->getImpl()->setRawPtr(tmp_res_manual.get(), nb_elements);
......@@ -77,13 +77,13 @@ TEST_CASE("[loss/classification] BCE", "[loss][classification][BCE]") {
const float res_manual = std::accumulate(&tmp_res_manual[0], &tmp_res_manual[nb_elements], 0.0f, std::plus<float>()) / static_cast<float>(nb_elements);
// compute the BCE using Aidge::loss::BCE function
std::cout << "Input 0 manual:" << std::endl;
fmt::println("Input 0 manual:");
std::shared_ptr<Tensor> pred_tensor = std::make_shared<Tensor>(dims);
pred_tensor->setBackend("cpu");
pred_tensor->getImpl()->setRawPtr(pred.get(), nb_elements);
pred_tensor->print();
std::cout << "Input 1 manual:" << std::endl;
fmt::println("Input 1 manual:");
std::shared_ptr<Tensor> targ_tensor = std::make_shared<Tensor>(dims);
targ_tensor->setBackend("cpu");
targ_tensor->getImpl()->setRawPtr(targ.get(), nb_elements);
......@@ -112,7 +112,7 @@ TEST_CASE("[loss/classification] BCE", "[loss][classification][BCE]") {
pred[i] = valueDist(gen);
}
float * d_pred;
cudaMalloc(&d_pred, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_pred), nb_elements * sizeof(float));
cudaMemcpy(d_pred, pred.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
// create random targets
......@@ -121,7 +121,7 @@ TEST_CASE("[loss/classification] BCE", "[loss][classification][BCE]") {
targ[i] = valueDist(gen);
}
float * d_targ;
cudaMalloc(&d_targ, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_targ), nb_elements * sizeof(float));
cudaMemcpy(d_targ, targ.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
// compute the BCE manually
......@@ -132,7 +132,7 @@ TEST_CASE("[loss/classification] BCE", "[loss][classification][BCE]") {
tmp_res_manual[i] = - ((targ[i] + eps1) * std::log(pred[i] + eps1) + (1.0f - targ[i] + eps2) * std::log(1.0f - pred[i] + eps2));
}
float * d_tmp_res_manual;
cudaMalloc(&d_tmp_res_manual, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_tmp_res_manual), nb_elements * sizeof(float));
cudaMemcpy(d_tmp_res_manual, tmp_res_manual.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims);
......
......@@ -71,7 +71,7 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") {
for (std::size_t i = 0; i < nb_elements; ++i) {
tmp_res_manual[i] = std::pow(pred[i] - targ[i],2);
}
std::cout << "Pow output manual:" << std::endl;
fmt::println("Pow output manual:");
std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims);
tmp_tensor->setBackend("cpu");
tmp_tensor->getImpl()->setRawPtr(tmp_res_manual.get(), nb_elements);
......@@ -79,13 +79,13 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") {
const float res_manual = std::accumulate(&tmp_res_manual[0], &tmp_res_manual[nb_elements], 0.0f, std::plus<float>()) / static_cast<float>(nb_elements);
// compute the MSE using Aidge::loss::MSE function
std::cout << "Sub input 0 manual:" << std::endl;
fmt::println("Sub input 0 manual:");
std::shared_ptr<Tensor> pred_tensor = std::make_shared<Tensor>(dims);
pred_tensor->setBackend("cpu");
pred_tensor->getImpl()->setRawPtr(pred.get(), nb_elements);
pred_tensor->print();
std::cout << "Sub input 1 manual:" << std::endl;
fmt::println("Sub input 1 manual:");
std::shared_ptr<Tensor> targ_tensor = std::make_shared<Tensor>(dims);
targ_tensor->setBackend("cpu");
targ_tensor->getImpl()->setRawPtr(targ.get(), nb_elements);
......@@ -112,7 +112,7 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") {
pred[i] = valueDist(gen);
}
float * d_pred;
cudaMalloc(&d_pred, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_pred), nb_elements * sizeof(float));
cudaMemcpy(d_pred, pred.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
// create random targets
......@@ -121,7 +121,7 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") {
targ[i] = valueDist(gen);
}
float * d_targ;
cudaMalloc(&d_targ, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_targ), nb_elements * sizeof(float));
cudaMemcpy(d_targ, targ.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
// compute the MSE manually
......@@ -130,7 +130,7 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") {
tmp_res_manual[i] = std::pow(pred[i] - targ[i],2);
}
float * d_tmp_res_manual;
cudaMalloc(&d_tmp_res_manual, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_tmp_res_manual), nb_elements * sizeof(float));
cudaMemcpy(d_tmp_res_manual, tmp_res_manual.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims);
......
......@@ -122,7 +122,7 @@ TEST_CASE("[metrics] Accuracy", "[metrics][Accuracy]") {
pred[i] = valueDist(gen);
}
float * d_pred;
cudaMalloc(&d_pred, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_pred), nb_elements * sizeof(float));
cudaMemcpy(d_pred, pred.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
// create random targets
......@@ -131,7 +131,7 @@ TEST_CASE("[metrics] Accuracy", "[metrics][Accuracy]") {
targ[i] = valueDist(gen);
}
float * d_targ;
cudaMalloc(&d_targ, nb_elements * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_targ), nb_elements * sizeof(float));
cudaMemcpy(d_targ, targ.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice);
......
......@@ -197,10 +197,10 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
}
// Allocate device memory
cudaMalloc(&d_val_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_grad_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_tensors[i]), size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_grad_tensors[i]), size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_momentum1_tensors[i]), size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_momentum1_tensors[i]), size_tensors[i] * sizeof(float));
// Copy data to device
cudaMemcpy(d_val_tensors[i], val_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
......
......@@ -205,9 +205,9 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
}
// Allocate device memory
cudaMalloc(&d_val_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_grad_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_momentum_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_tensors[i]), size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_grad_tensors[i]), size_tensors[i] * sizeof(float));
cudaMalloc(reinterpret_cast<void **>(&d_val_momentum_tensors[i]), size_tensors[i] * sizeof(float));
// Copy data to device
cudaMemcpy(d_val_tensors[i], val_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
......
0.2.2
0.2.3
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment