Skip to content

Commit b88b60f

Browse files
committed
add celerity blockchain for task divergence checking
1 parent 0822c32 commit b88b60f

16 files changed

+919
-5
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ set(SOURCES
187187
src/command_graph.cc
188188
src/config.cc
189189
src/device_queue.cc
190+
src/divergence_block_chain.cc
190191
src/executor.cc
191192
src/distributed_graph_generator.cc
192193
src/graph_serializer.cc

include/divergence_block_chain.h

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#pragma once
2+
3+
#include "recorders.h"
4+
#include <mutex>
5+
#include <thread>
6+
#include <vector>
7+
8+
namespace celerity::detail {
9+
/** @brief This class is a wrapper around a 1D vector that allows us to access it as a 2D array.
10+
*
11+
* It is used to send the task hashes to other nodes using MPI while keeping the code simple and readable.
12+
*/
13+
template <typename T>
14+
struct mpi_2d_send_wrapper {
15+
public:
16+
const T& operator[](std::pair<int, int> ij) const {
17+
assert(ij.first * m_width + ij.second < m_data.size());
18+
return m_data[ij.first * m_width + ij.second];
19+
}
20+
21+
T* data() { return m_data.data(); }
22+
23+
mpi_2d_send_wrapper(size_t width, size_t height) : m_data(width * height), m_width(width){};
24+
25+
private:
26+
std::vector<T> m_data;
27+
const size_t m_width;
28+
};
29+
30+
/** @brief This class gives a view into a const vector.
31+
*
32+
* It is used to give us the currently unhashed task records while keeping track of the offset and width.
33+
*/
34+
template <typename T>
35+
struct window {
36+
public:
37+
window(const std::vector<T>& value) : m_value(value) {}
38+
39+
const T& operator[](size_t i) const {
40+
assert(i >= 0 && i < m_width);
41+
return m_value[m_offset + i];
42+
}
43+
44+
size_t size() {
45+
m_width = m_value.size() - m_offset;
46+
return m_width;
47+
}
48+
49+
void slide(size_t i) {
50+
assert(i == 0 || (i >= 0 && i <= m_width));
51+
m_offset += i;
52+
m_width -= i;
53+
}
54+
55+
private:
56+
const std::vector<T>& m_value;
57+
size_t m_offset = 0;
58+
size_t m_width = 0;
59+
};
60+
61+
using task_hash = size_t;
62+
using task_hash_data = mpi_2d_send_wrapper<task_hash>;
63+
using divergence_map = std::unordered_map<task_hash, std::vector<node_id>>;
64+
65+
/** @brief This class is the base class for the divergence check.
66+
*
67+
* It is responsible for collecting the task hashes from all nodes and checking for differences -> divergence.
68+
* When a divergence is found, the task record for the diverging task is printed and the program is terminated.
69+
* Additionally it also checks for deadlocks and prints a warning if one is detected.
70+
*/
71+
class abstract_block_chain {
72+
friend struct abstract_block_chain_testspy;
73+
74+
public:
75+
virtual void stop() { m_is_running = false; };
76+
77+
abstract_block_chain(const abstract_block_chain&) = delete;
78+
abstract_block_chain& operator=(const abstract_block_chain&) = delete;
79+
abstract_block_chain& operator=(abstract_block_chain&&) = delete;
80+
81+
abstract_block_chain(abstract_block_chain&&) = default;
82+
83+
abstract_block_chain(size_t num_nodes, node_id local_nid, const std::vector<task_record>& task_recorder, MPI_Comm comm)
84+
: m_local_nid(local_nid), m_num_nodes(num_nodes), m_sizes(num_nodes), m_task_recorder_window(task_recorder), m_comm(comm) {}
85+
86+
virtual ~abstract_block_chain() = default;
87+
88+
protected:
89+
void start() { m_is_running = true; };
90+
91+
virtual void run() = 0;
92+
93+
virtual void divergence_out(const divergence_map& check_map, const int task_num) = 0;
94+
95+
void add_new_hashes();
96+
void clear(const int min_progress);
97+
virtual void allgather_sizes();
98+
virtual void allgather_hashes(const int max_size, task_hash_data& data);
99+
std::pair<int, int> collect_sizes();
100+
task_hash_data collect_hashes(const int max_size);
101+
divergence_map create_check_map(const task_hash_data& task_graphs, const int task_num) const;
102+
103+
void check_for_deadlock() const;
104+
105+
static void print_node_divergences(const divergence_map& check_map, const int task_num);
106+
107+
static void print_task_record(const divergence_map& check_map, const task_record& task, const task_hash hash);
108+
109+
virtual void dedub_print_task_record(const divergence_map& check_map, const int task_num) const;
110+
111+
bool check_for_divergence();
112+
113+
protected:
114+
node_id m_local_nid;
115+
size_t m_num_nodes;
116+
117+
std::vector<task_hash> m_hashes;
118+
std::vector<int> m_sizes;
119+
120+
bool m_is_running = true;
121+
122+
window<task_record> m_task_recorder_window;
123+
124+
std::chrono::time_point<std::chrono::steady_clock> m_last_cleared = std::chrono::steady_clock::now();
125+
126+
MPI_Comm m_comm;
127+
};
128+
129+
class divergence_block_chain : public abstract_block_chain {
130+
public:
131+
void start();
132+
void stop() override;
133+
134+
divergence_block_chain(size_t num_nodes, node_id local_nid, const std::vector<task_record>& task_record, MPI_Comm comm, bool test_mode = false)
135+
: abstract_block_chain(num_nodes, local_nid, task_record, comm), m_test_mode(test_mode) {
136+
divergence_block_chain::start();
137+
}
138+
139+
divergence_block_chain(const divergence_block_chain&) = delete;
140+
divergence_block_chain& operator=(const divergence_block_chain&) = delete;
141+
divergence_block_chain& operator=(divergence_block_chain&&) = delete;
142+
143+
divergence_block_chain(divergence_block_chain&&) = default;
144+
145+
~divergence_block_chain() override { divergence_block_chain::stop(); }
146+
147+
private:
148+
void run() override;
149+
150+
void divergence_out(const divergence_map& check_map, const int task_num) override;
151+
152+
private:
153+
std::thread m_thread;
154+
bool m_test_mode = false;
155+
};
156+
}; // namespace celerity::detail

include/grid.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <gch/small_vector.hpp>
99

1010
#include "ranges.h"
11+
#include "utils.h"
1112
#include "workaround.h"
1213

1314
namespace celerity::detail {
@@ -257,6 +258,27 @@ class region {
257258

258259
} // namespace celerity::detail
259260

261+
template <int Dims>
262+
struct std::hash<celerity::detail::box<Dims>> {
263+
std::size_t operator()(const celerity::detail::box<Dims> r) {
264+
std::size_t seed = 0;
265+
celerity::detail::utils::hash_combine(seed, std::hash<celerity::id<Dims>>{}(r.get_min()), std::hash<celerity::id<Dims>>{}(r.get_max()));
266+
return seed;
267+
};
268+
};
269+
270+
template <int Dims>
271+
struct std::hash<celerity::detail::region<Dims>> {
272+
std::size_t operator()(const celerity::detail::region<Dims> r) {
273+
std::size_t seed = 0;
274+
for(auto box : r.get_boxes()) {
275+
celerity::detail::utils::hash_combine(seed, std::hash<celerity::detail::box<Dims>>{}(box));
276+
}
277+
return seed;
278+
};
279+
};
280+
281+
260282
namespace celerity::detail::grid_detail {
261283

262284
// forward-declaration for tests (explicitly instantiated)

include/print_utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ struct fmt::formatter<celerity::chunk<Dims>> : fmt::formatter<celerity::subrange
7070
out = formatter<celerity::id<Dims>>::format(celerity::id(chunk.global_size), ctx); // cast to id to avoid multiple inheritance
7171
return out;
7272
}
73-
};
73+
};

include/ranges.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "sycl_wrappers.h"
4+
#include "utils.h"
45
#include "workaround.h"
56

67
namespace celerity {
@@ -229,6 +230,17 @@ struct ones_t {
229230

230231
}; // namespace celerity::detail
231232

233+
template <typename Interface, int Dims>
234+
struct std::hash<celerity::detail::coordinate<Interface, Dims>> {
235+
std::size_t operator()(const celerity::detail::coordinate<Interface, Dims>& r) const noexcept {
236+
std::size_t seed = 0;
237+
for(int i = 0; i < Dims; ++i) {
238+
celerity::detail::utils::hash_combine(seed, std::hash<int>{}(r[i]));
239+
}
240+
return seed;
241+
};
242+
};
243+
232244
namespace celerity {
233245

234246
template <int Dims>
@@ -401,6 +413,17 @@ nd_range(range<3> global_range, range<3> local_range)->nd_range<3>;
401413

402414
} // namespace celerity
403415

416+
417+
template <int Dims>
418+
struct std::hash<celerity::range<Dims>> {
419+
std::size_t operator()(const celerity::range<Dims>& r) const noexcept { return std::hash<celerity::detail::coordinate<celerity::range<Dims>, Dims>>{}(r); };
420+
};
421+
422+
template <int Dims>
423+
struct std::hash<celerity::id<Dims>> {
424+
std::size_t operator()(const celerity::id<Dims>& r) const noexcept { return std::hash<celerity::detail::coordinate<celerity::id<Dims>, Dims>>{}(r); };
425+
};
426+
404427
namespace celerity {
405428
namespace detail {
406429

include/recorders.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,85 @@ class command_recorder {
114114
};
115115

116116
} // namespace celerity::detail
117+
118+
template <>
119+
struct std::hash<celerity::detail::reduction_record> {
120+
std::size_t operator()(const celerity::detail::reduction_record& r) const noexcept {
121+
std::size_t seed = 0;
122+
celerity::detail::utils::hash_combine(seed, std::hash<celerity::detail::reduction_id>{}(r.rid), std::hash<celerity::detail::buffer_id>{}(r.bid),
123+
std::hash<std::string>{}(r.buffer_name), std::hash<bool>{}(r.init_from_buffer));
124+
return seed;
125+
};
126+
};
127+
128+
template <>
129+
struct std::hash<celerity::detail::access_record> {
130+
std::size_t operator()(const celerity::detail::access_record& r) {
131+
std::size_t seed = 0;
132+
celerity::detail::utils::hash_combine(seed, std::hash<celerity::detail::buffer_id>{}(r.bid), std::hash<std::string>{}(r.buffer_name),
133+
std::hash<celerity::access_mode>{}(r.mode), std::hash<celerity::detail::region<3>>{}(r.req));
134+
return seed;
135+
};
136+
};
137+
138+
template <typename IdType>
139+
struct std::hash<celerity::detail::dependency_record<IdType>> {
140+
std::size_t operator()(const celerity::detail::dependency_record<IdType>& r) const noexcept {
141+
std::size_t seed = 0;
142+
celerity::detail::utils::hash_combine(seed, std::hash<IdType>{}(r.node), std::hash<celerity::detail::dependency_kind>{}(r.kind),
143+
std::hash<celerity::detail::dependency_origin>{}(r.origin));
144+
return seed;
145+
};
146+
};
147+
148+
template <>
149+
struct std::hash<celerity::detail::side_effect_map> {
150+
std::size_t operator()(const celerity::detail::side_effect_map& m) const noexcept {
151+
std::size_t seed = 0;
152+
for(auto& [hoid, order] : m) {
153+
celerity::detail::utils::hash_combine(
154+
seed, std::hash<celerity::detail::host_object_id>{}(hoid), std::hash<celerity::experimental::side_effect_order>{}(order));
155+
}
156+
return seed;
157+
};
158+
};
159+
160+
template <>
161+
struct std::hash<celerity::detail::task_record> {
162+
std::size_t operator()(const celerity::detail::task_record& t) const noexcept {
163+
std::size_t seed = 0;
164+
celerity::detail::utils::hash_combine(seed, std::hash<celerity::detail::task_id>{}(t.tid), std::hash<std::string>{}(t.debug_name),
165+
std::hash<celerity::detail::collective_group_id>{}(t.cgid), std::hash<celerity::detail::task_type>{}(t.type),
166+
std::hash<celerity::detail::task_geometry>{}(t.geometry), celerity::detail::utils::vector_hash{}(t.reductions),
167+
celerity::detail::utils::vector_hash{}(t.accesses), std::hash<celerity::detail::side_effect_map>{}(t.side_effect_map),
168+
celerity::detail::utils::vector_hash{}(t.dependencies));
169+
170+
return seed;
171+
};
172+
};
173+
174+
template <>
175+
struct fmt::formatter<celerity::detail::dependency_kind> : fmt::formatter<std::string> {
176+
static format_context::iterator format(const celerity::detail::dependency_kind& dk, format_context& ctx) {
177+
auto out = ctx.out();
178+
switch(dk) {
179+
case celerity::detail::dependency_kind::anti_dep: out = std::copy_n("anti-dep", 8, out); break;
180+
case celerity::detail::dependency_kind::true_dep: out = std::copy_n("true-dep", 8, out); break;
181+
}
182+
return out;
183+
}
184+
};
185+
186+
template <>
187+
struct fmt::formatter<celerity::detail::dependency_origin> : fmt::formatter<std::string> {
188+
static format_context::iterator format(const celerity::detail::dependency_origin& dk, format_context& ctx) {
189+
auto out = ctx.out();
190+
switch(dk) {
191+
case celerity::detail::dependency_origin::dataflow: out = std::copy_n("dataflow", 8, out); break;
192+
case celerity::detail::dependency_origin::collective_group_serialization: out = std::copy_n("collective-group-serialization", 31, out); break;
193+
case celerity::detail::dependency_origin::execution_front: out = std::copy_n("execution-front", 15, out); break;
194+
case celerity::detail::dependency_origin::last_epoch: out = std::copy_n("last-epoch", 10, out); break;
195+
}
196+
return out;
197+
}
198+
};

include/runtime.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "command.h"
88
#include "config.h"
99
#include "device_queue.h"
10+
#include "divergence_block_chain.h"
1011
#include "frame.h"
1112
#include "host_queue.h"
1213
#include "recorders.h"
@@ -101,6 +102,8 @@ namespace detail {
101102
size_t m_num_nodes;
102103
node_id m_local_nid;
103104

105+
std::unique_ptr<abstract_block_chain> m_divergence_check;
106+
104107
// These management classes are only constructed on the master node.
105108
std::unique_ptr<command_graph> m_cdag;
106109
std::unique_ptr<scheduler> m_schdlr;

include/task.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "lifetime_extending_state.h"
1414
#include "range_mapper.h"
1515
#include "types.h"
16+
#include "utils.h"
1617

1718
namespace celerity {
1819

@@ -273,3 +274,31 @@ namespace detail {
273274

274275
} // namespace detail
275276
} // namespace celerity
277+
278+
template <>
279+
struct std::hash<celerity::detail::task_geometry> {
280+
std::size_t operator()(const celerity::detail::task_geometry& g) const noexcept {
281+
std::size_t seed = 0;
282+
celerity::detail::utils::hash_combine(seed, std::hash<int>{}(g.dimensions), std::hash<celerity::range<3>>{}(g.global_size),
283+
std::hash<celerity::id<3>>{}(g.global_offset), std::hash<celerity::range<3>>{}(g.granularity));
284+
return seed;
285+
};
286+
};
287+
288+
template <>
289+
struct fmt::formatter<celerity::detail::task_type> : fmt::formatter<std::string> {
290+
static format_context::iterator format(const celerity::detail::task_type& tt, format_context& ctx) {
291+
auto out = ctx.out();
292+
switch(tt) {
293+
case celerity::detail::task_type::epoch: out = std::copy_n("epoch", 5, out); break;
294+
case celerity::detail::task_type::host_compute: out = std::copy_n("host-compute", 12, out); break;
295+
case celerity::detail::task_type::device_compute: out = std::copy_n("device-compute", 14, out); break;
296+
case celerity::detail::task_type::collective: out = std::copy_n("collective", 10, out); break;
297+
case celerity::detail::task_type::master_node: out = std::copy_n("master-node", 11, out); break;
298+
case celerity::detail::task_type::horizon: out = std::copy_n("horizon", 7, out); break;
299+
case celerity::detail::task_type::fence: out = std::copy_n("fence", 5, out); break;
300+
default: out = std::copy_n("unknown", 7, out); break;
301+
}
302+
return out;
303+
}
304+
};

0 commit comments

Comments
 (0)