Skip to content

Commit 30ea9ea

Browse files
authored
Merge branch 'dev' into migrate-dynamic-map
2 parents 4b1db05 + 0b214c6 commit 30ea9ea

File tree

3 files changed

+123
-16
lines changed

3 files changed

+123
-16
lines changed

include/cuco/bucket_storage.cuh

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2026, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -21,15 +21,16 @@
2121
#include <cuco/utility/allocator.hpp>
2222

2323
#include <cuda/std/array>
24+
#include <cuda/std/bit>
2425
#include <cuda/std/functional>
2526
#include <cuda/stream_ref>
2627

2728
#include <cstddef>
2829
#include <cstdint>
29-
#include <iterator>
3030
#include <memory>
3131

3232
namespace cuco {
33+
3334
/**
3435
* @brief Non-owning array of slots storage reference type.
3536
*
@@ -40,14 +41,18 @@ namespace cuco {
4041
template <typename T, int32_t BucketSize, typename Extent = cuco::extent<std::size_t>>
4142
class bucket_storage_ref {
4243
public:
43-
static constexpr int32_t bucket_size = BucketSize; ///< Number of elements processed per bucket
44+
static constexpr int32_t bucket_size = BucketSize; ///< Number of elements per bucket
45+
static constexpr std::size_t max_vector_load_bytes = 16; ///< Maximum vector load width in bytes
46+
47+
using bucket_type = cuda::std::array<T, BucketSize>; ///< Slot bucket type
48+
4449
static constexpr std::size_t alignment =
45-
cuda::std::min(sizeof(T) * bucket_size, std::size_t{16}); ///< Required alignment
50+
cuda::std::min(cuda::std::bit_ceil(sizeof(bucket_type)),
51+
max_vector_load_bytes); ///< Required alignment in bytes
4652

4753
using extent_type = Extent; ///< Storage extent type
4854
using size_type = typename extent_type::value_type; ///< Storage size type
4955
using value_type = T; ///< Slot type
50-
using bucket_type = cuda::std::array<T, BucketSize>; ///< Slot bucket type
5156

5257
/**
5358
* @brief Constructor of slot storage ref.
@@ -150,7 +155,7 @@ class bucket_storage {
150155
using value_type = T; ///< Slot type
151156
using bucket_type = cuda::std::array<T, BucketSize>; ///< Slot bucket type
152157

153-
/// Type of the allocator to (de)allocate buckets
158+
/// Type of the allocator to (de)allocate slots
154159
using allocator_type =
155160
typename std::allocator_traits<Allocator>::template rebind_alloc<value_type>;
156161
using ref_type = bucket_storage_ref<value_type, bucket_size, extent_type>; ///< Storage ref type
@@ -244,13 +249,19 @@ class bucket_storage {
244249
[[nodiscard]] __host__ __device__ constexpr extent_type extent() const noexcept;
245250

246251
private:
247-
using slot_deleter_type =
248-
detail::custom_deleter<size_type, allocator_type>; ///< Type of slot deleter
252+
struct aligned_deleter {
253+
value_type* raw_ptr_;
254+
std::size_t size_;
255+
allocator_type& allocator_;
256+
cuda::stream_ref stream_;
257+
258+
void operator()(value_type*) const { allocator_.deallocate(raw_ptr_, size_, stream_); }
259+
};
249260

250261
extent_type extent_; ///< Storage extent
251262
allocator_type allocator_; ///< Allocator used to (de)allocate slots
252-
/// Pointer to the slot storage
253-
std::unique_ptr<value_type, slot_deleter_type> slots_;
263+
/// Pointer to the aligned slot storage
264+
std::unique_ptr<value_type, aligned_deleter> slots_;
254265
};
255266
} // namespace cuco
256267

include/cuco/detail/storage/bucket_storage.inl

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2026, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -23,9 +23,11 @@
2323
#include <cub/device/device_for.cuh>
2424
#include <cuda/std/array>
2525
#include <cuda/std/bit>
26+
#include <cuda/std/cstdint>
2627
#include <cuda/stream_ref>
2728

2829
#include <cassert>
30+
#include <memory>
2931

3032
namespace cuco {
3133

@@ -98,10 +100,16 @@ bucket_storage_ref<T, BucketSize, Extent>::extent() const noexcept
98100
template <typename T, int BucketSize, typename Extent, typename Allocator>
99101
constexpr bucket_storage<T, BucketSize, Extent, Allocator>::bucket_storage(
100102
Extent size, Allocator const& allocator, cuda::stream_ref stream)
101-
: extent_{size},
102-
allocator_{allocator},
103-
slots_{allocator_.allocate(capacity(), stream),
104-
slot_deleter_type{capacity(), allocator_, stream}}
103+
: extent_{size}, allocator_{allocator}, slots_{[this, &stream]() {
104+
constexpr std::size_t align = ref_type::alignment;
105+
constexpr std::size_t extra = (align - 1) / sizeof(value_type) + 1;
106+
std::size_t const alloc_size = static_cast<std::size_t>(capacity()) + extra;
107+
auto* const raw_ptr = allocator_.allocate(alloc_size, stream);
108+
auto* const aligned_ptr = reinterpret_cast<value_type*>(
109+
(reinterpret_cast<cuda::std::uintptr_t>(raw_ptr) + align - 1) & ~(align - 1));
110+
return std::unique_ptr<value_type, aligned_deleter>{
111+
aligned_ptr, aligned_deleter{raw_ptr, alloc_size, allocator_, stream}};
112+
}()}
105113
{
106114
}
107115

tests/utility/storage_test.cu

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2026, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -21,8 +21,12 @@
2121
#include <cuco/pair.cuh>
2222
#include <cuco/utility/allocator.hpp>
2323

24+
#include <cuda/std/bit>
25+
2426
#include <catch2/catch_template_test_macros.hpp>
2527

28+
#include <cstdint>
29+
2630
TEMPLATE_TEST_CASE_SIG("utility storage tests",
2731
"",
2832
((typename Key, typename Value), Key, Value),
@@ -96,4 +100,88 @@ TEMPLATE_TEST_CASE_SIG("utility storage tests",
96100
STATIC_REQUIRE(num_buckets == size / bucket_size);
97101
STATIC_REQUIRE(capacity == gold_capacity);
98102
}
103+
104+
SECTION("Storage alignment constant is correct for pairs.")
105+
{
106+
using storage_ref_type =
107+
cuco::bucket_storage_ref<cuco::pair<Key, Value>, bucket_size, cuco::extent<std::size_t>>;
108+
using bucket_type = typename storage_ref_type::bucket_type;
109+
110+
constexpr auto alignment = storage_ref_type::alignment;
111+
constexpr auto expected_align = cuda::std::min(cuda::std::bit_ceil(sizeof(bucket_type)),
112+
storage_ref_type::max_vector_load_bytes);
113+
114+
STATIC_REQUIRE(alignment == expected_align);
115+
STATIC_REQUIRE(cuda::std::has_single_bit(alignment));
116+
}
117+
118+
SECTION("Storage alignment constant is correct for keys.")
119+
{
120+
using storage_ref_type = cuco::bucket_storage_ref<Key, bucket_size, cuco::extent<std::size_t>>;
121+
using bucket_type = typename storage_ref_type::bucket_type;
122+
123+
constexpr auto alignment = storage_ref_type::alignment;
124+
constexpr auto expected_align = cuda::std::min(cuda::std::bit_ceil(sizeof(bucket_type)),
125+
storage_ref_type::max_vector_load_bytes);
126+
127+
STATIC_REQUIRE(alignment == expected_align);
128+
STATIC_REQUIRE(cuda::std::has_single_bit(alignment));
129+
}
130+
131+
SECTION("Storage data pointer is aligned to bucket boundary for pairs.")
132+
{
133+
auto s = cuco::bucket_storage<cuco::pair<Key, Value>,
134+
bucket_size,
135+
cuco::extent<std::size_t>,
136+
allocator_type>(
137+
cuco::extent{size}, allocator, cuda::stream_ref{cudaStream_t{nullptr}});
138+
139+
auto const ptr = reinterpret_cast<std::uintptr_t>(s.data());
140+
auto const alignment = decltype(s)::ref_type::alignment;
141+
142+
REQUIRE((ptr % alignment) == 0);
143+
}
144+
145+
SECTION("Storage data pointer is aligned to bucket boundary for keys.")
146+
{
147+
auto s = cuco::bucket_storage<Key, bucket_size, cuco::extent<std::size_t>, allocator_type>(
148+
cuco::extent{size}, allocator, cuda::stream_ref{cudaStream_t{nullptr}});
149+
150+
auto const ptr = reinterpret_cast<std::uintptr_t>(s.data());
151+
auto const alignment = decltype(s)::ref_type::alignment;
152+
153+
REQUIRE((ptr % alignment) == 0);
154+
}
155+
}
156+
157+
TEMPLATE_TEST_CASE_SIG("bucket storage alignment with different bucket sizes",
158+
"",
159+
((typename T, int BucketSize), T, BucketSize),
160+
(int32_t, 1),
161+
(int32_t, 2),
162+
(int32_t, 4),
163+
(int64_t, 1),
164+
(int64_t, 2),
165+
(cuco::pair<int32_t, int32_t>, 1),
166+
(cuco::pair<int32_t, int32_t>, 2),
167+
(cuco::pair<int64_t, int64_t>, 1))
168+
{
169+
constexpr std::size_t size{1'000};
170+
171+
using allocator_type = cuco::cuda_allocator<char>;
172+
using storage_type =
173+
cuco::bucket_storage<T, BucketSize, cuco::extent<std::size_t>, allocator_type>;
174+
using storage_ref_type = typename storage_type::ref_type;
175+
176+
auto allocator = allocator_type{};
177+
178+
SECTION("Data pointer is aligned to bucket boundary.")
179+
{
180+
auto s = storage_type(cuco::extent{size}, allocator, cuda::stream_ref{cudaStream_t{nullptr}});
181+
182+
auto const ptr = reinterpret_cast<std::uintptr_t>(s.data());
183+
auto const alignment = storage_ref_type::alignment;
184+
185+
REQUIRE((ptr % alignment) == 0);
186+
}
99187
}

0 commit comments

Comments
 (0)