Skip to content

Commit d0c2b42

Browse files
author
Avikant Wadhwa
committed
Device Vector added and Integrated with Neuro Vertices
1 parent 5a73bb7 commit d0c2b42

File tree

10 files changed

+693
-376
lines changed

10 files changed

+693
-376
lines changed

CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ endif()
462462
target_compile_options(gtest PRIVATE -Wno-error=maybe-uninitialized)
463463
target_compile_options(gtest_main PRIVATE -Wno-error=maybe-uninitialized)
464464

465+
if(ENABLE_CUDA)
466+
set(cuda_TestSources
467+
Testing/UnitTesting/DeviceVectorTests.cpp)
468+
set_source_files_properties(${cuda_TestSources} PROPERTIES LANGUAGE CUDA)
469+
endif()
470+
465471
add_executable(tests
466472
Testing/RunTests.cpp
467473
Testing/UnitTesting/OperationManagerTests.cpp
@@ -481,7 +487,8 @@ add_executable(tests
481487
Testing/Utils/CircularBufferTests.cpp
482488
Testing/UnitTesting/EventBufferTests.cpp
483489
Testing/UnitTesting/XmlRecorderTests.cpp
484-
Testing/UnitTesting/Hdf5RecorderTests.cpp)
490+
Testing/UnitTesting/Hdf5RecorderTests.cpp
491+
Testing/UnitTesting/DeviceVectorTests.cpp)
485492

486493
# Links the Googletest framework with the testing executable
487494
target_link_libraries(tests gtest gtest_main)

Simulator/Utils/DeviceVector.h

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
/**
2+
* @file DeviceVector.h
3+
*
4+
* @ingroup Simulator/Utils
5+
*
6+
* @brief A vector class that manages both host and device (GPU) memory
7+
*
8+
* This class provides a std::vector-like interface for data that can be transferred
9+
* between host and device memory. It manages the allocation and deallocation of
10+
* GPU memory and provides synchronization methods between host and device.
11+
*/
12+
13+
#pragma once
14+
#include <stdexcept>
15+
#include <vector>
16+
17+
#if defined(__CUDACC__)
18+
#include "BGTypes.h"
19+
#include "Book.h"
20+
#endif
21+
22+
template <typename T> class DeviceVector {
23+
public:
24+
/// Reference type that handles bool specialization
25+
using reference =
26+
typename std::conditional_t<std::is_same_v<T, bool>, std::vector<bool>::reference, T &>;
27+
28+
/// Constructor (no GPU allocation)
29+
explicit DeviceVector(size_t size = 0) : hostData_(size), devicePtr_(nullptr)
30+
{
31+
}
32+
33+
~DeviceVector() = default;
34+
35+
/// Delete copy operations to prevent resource leaks
36+
DeviceVector(const DeviceVector &) = delete;
37+
DeviceVector &operator=(const DeviceVector &) = delete;
38+
39+
/// Add element to the end
40+
void push_back(const T &value)
41+
{
42+
hostData_.push_back(value);
43+
}
44+
45+
/// Resize the vector
46+
void resize(size_t new_size)
47+
{
48+
hostData_.resize(new_size);
49+
}
50+
51+
void resize(size_t new_size, const T &value)
52+
{
53+
hostData_.resize(new_size, value);
54+
}
55+
56+
/// Clear all elements
57+
void clear()
58+
{
59+
hostData_.clear();
60+
}
61+
62+
/// Reserve capacity
63+
void reserve(size_t new_cap)
64+
{
65+
hostData_.reserve(new_cap);
66+
}
67+
68+
/// Get size
69+
size_t size() const
70+
{
71+
return hostData_.size();
72+
}
73+
74+
/// Check if empty
75+
bool empty() const
76+
{
77+
return hostData_.empty();
78+
}
79+
80+
/// Assign value to all elements
81+
void assign(size_t n, const T &value)
82+
{
83+
hostData_.assign(n, value);
84+
}
85+
86+
/// Get reference to host vector
87+
const std::vector<T> &getHostVector() const
88+
{
89+
return hostData_;
90+
}
91+
92+
std::vector<T> getHostVector()
93+
{
94+
return hostData_;
95+
}
96+
97+
/// Get pointer to device memory
98+
T *getDevicePointer()
99+
{
100+
return devicePtr_;
101+
}
102+
103+
const T *getDevicePointer() const
104+
{
105+
return devicePtr_;
106+
}
107+
108+
/// Implicit conversion to device pointer
109+
operator T *()
110+
{
111+
return devicePtr_;
112+
}
113+
114+
operator const T *() const
115+
{
116+
return devicePtr_;
117+
}
118+
119+
/// Implicit conversion to host vector
120+
operator std::vector<T> &()
121+
{
122+
return hostData_;
123+
}
124+
125+
operator const std::vector<T> &() const
126+
{
127+
return hostData_;
128+
}
129+
130+
/// Element access operator that works with both bool and non-bool types
131+
reference operator[](size_t idx)
132+
{
133+
return hostData_[idx];
134+
}
135+
136+
/// Const element access operator
137+
const T operator[](size_t idx) const
138+
{
139+
if constexpr (std::is_same_v<T, bool>)
140+
return static_cast<bool>(hostData_[idx]); // ensure value, not proxy
141+
142+
return hostData_[idx]; // normal types
143+
}
144+
145+
/// Get pointer to host data
146+
T *data()
147+
{
148+
return hostData_.data();
149+
}
150+
151+
const T *data() const
152+
{
153+
return hostData_.data();
154+
}
155+
156+
/// Safe element access with bounds checking
157+
T &at(size_t idx)
158+
{
159+
return hostData_.at(idx);
160+
}
161+
162+
const T &at(size_t idx) const
163+
{
164+
return hostData_.at(idx);
165+
}
166+
167+
/// Access first element
168+
T &front()
169+
{
170+
return hostData_.front();
171+
}
172+
173+
const T &front() const
174+
{
175+
return hostData_.front();
176+
}
177+
178+
/// Access last element
179+
T &back()
180+
{
181+
return hostData_.back();
182+
}
183+
184+
const T &back() const
185+
{
186+
return hostData_.back();
187+
}
188+
189+
/// Iterator support for range-based loops
190+
auto begin()
191+
{
192+
return hostData_.begin();
193+
}
194+
195+
auto end()
196+
{
197+
return hostData_.end();
198+
}
199+
200+
auto begin() const
201+
{
202+
return hostData_.begin();
203+
}
204+
205+
auto end() const
206+
{
207+
return hostData_.end();
208+
}
209+
210+
#if defined(__CUDACC__)
211+
public:
212+
/// Allocates device memory for the vector data.
213+
/// If device memory is already allocated, it is freed before allocating new memory.
214+
void allocateDeviceMemory()
215+
{
216+
if (devicePtr_)
217+
freeDeviceMemory();
218+
cudaMalloc(&devicePtr_, hostData_.size() * sizeof(T));
219+
}
220+
221+
/// Frees the allocated device memory.
222+
void freeDeviceMemory()
223+
{
224+
if (devicePtr_) {
225+
HANDLE_ERROR(cudaFree(devicePtr_));
226+
devicePtr_ = nullptr;
227+
}
228+
}
229+
230+
/// Copy data from host to device memory
231+
/// @throws std::runtime_error if device memory is not allocated
232+
void copyToDevice()
233+
{
234+
if (!devicePtr_)
235+
throw std::runtime_error("Device memory not allocated. Call allocateDeviceMemory()");
236+
237+
if constexpr (std::is_same_v<T, bool>) {
238+
const size_t n = hostData_.size();
239+
bool raw_data[n];
240+
for (size_t i = 0; i < n; ++i) {
241+
raw_data[i] = hostData_[i];
242+
}
243+
HANDLE_ERROR(cudaMemcpy(devicePtr_, raw_data, n * sizeof(bool), cudaMemcpyHostToDevice));
244+
} else {
245+
HANDLE_ERROR(cudaMemcpy(devicePtr_, hostData_.data(), hostData_.size() * sizeof(T),
246+
cudaMemcpyHostToDevice));
247+
}
248+
}
249+
250+
/// Copy data from device to host memory
251+
/// @throws std::runtime_error if device memory is not allocated
252+
void copyToHost()
253+
{
254+
if (!devicePtr_)
255+
throw std::runtime_error("Device memory not allocated.");
256+
257+
if constexpr (std::is_same_v<T, bool>) {
258+
const size_t n = hostData_.size();
259+
bool raw_data[n];
260+
HANDLE_ERROR(cudaMemcpy(raw_data, devicePtr_, n * sizeof(bool), cudaMemcpyDeviceToHost));
261+
for (size_t i = 0; i < n; ++i) {
262+
hostData_[i] = raw_data[i];
263+
}
264+
} else {
265+
HANDLE_ERROR(cudaMemcpy(hostData_.data(), devicePtr_, hostData_.size() * sizeof(T),
266+
cudaMemcpyDeviceToHost));
267+
}
268+
}
269+
#endif
270+
271+
private:
272+
std::vector<T> hostData_; // Host-side vector
273+
T *devicePtr_; // Device pointer
274+
};

0 commit comments

Comments
 (0)