Skip to content

Commit 132e274

Browse files
authored
Aggrgating device timing by kernel or kernel + shape in device timing summary (#39)
1 parent 6a85c1d commit 132e274

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

tools/unitrace/src/levelzero/ze_collector.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,9 @@ class ZeCollector {
11101110
std::vector<std::string> knames;
11111111
size_t max_name_size = 0;
11121112
global_device_time_stats_mutex_.lock();
1113+
1114+
AggregateDeviceTimeStats();
1115+
11131116
std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list(
11141117
global_device_time_stats_->begin(), global_device_time_stats_->end());
11151118

@@ -1197,6 +1200,9 @@ class ZeCollector {
11971200
std::vector<std::string> knames;
11981201
size_t max_name_size = 0;
11991202
global_device_time_stats_mutex_.lock();
1203+
1204+
AggregateDeviceTimeStats();
1205+
12001206
std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list(
12011207
global_device_time_stats_->begin(), global_device_time_stats_->end());
12021208

@@ -3980,6 +3986,49 @@ class ZeCollector {
39803986
local_device_submissions_.CollectHostFunctionTimeStats(id, time);
39813987
}
39823988

3989+
void AggregateDeviceTimeStats() const {
3990+
// do not acquire global_device_time_stats_mutex_. caller dos it.
3991+
for (auto it = global_device_time_stats_->begin(); it != global_device_time_stats_->end(); it++) {
3992+
std::string kname;
3993+
if (it->first.tile_ >= 0) {
3994+
kname = "Tile #" + std::to_string(it->first.tile_) + ": " + GetZeKernelCommandName(it->first.kernel_command_id_, it->first.group_count_, it->first.mem_size_, options_.verbose);
3995+
}
3996+
else {
3997+
kname = GetZeKernelCommandName(it->first.kernel_command_id_, it->first.group_count_, it->first.mem_size_, options_.verbose);
3998+
}
3999+
4000+
auto it2 = it;
4001+
it2++;
4002+
4003+
for (; it2 != global_device_time_stats_->end();) {
4004+
std::string kname2;
4005+
if (it2->first.tile_ >= 0) {
4006+
kname2 = "Tile #" + std::to_string(it2->first.tile_) + ": " + GetZeKernelCommandName(it2->first.kernel_command_id_, it2->first.group_count_, it2->first.mem_size_, options_.verbose);
4007+
}
4008+
else {
4009+
kname2 = GetZeKernelCommandName(it2->first.kernel_command_id_, it2->first.group_count_, it2->first.mem_size_, options_.verbose);
4010+
}
4011+
4012+
if (kname2 == kname) {
4013+
it->second.append_time_ += it2->second.append_time_;
4014+
it->second.submit_time_ += it2->second.submit_time_;
4015+
it->second.execute_time_ += it2->second.execute_time_;
4016+
if (it->second.min_time_ > it2->second.min_time_) {
4017+
it->second.min_time_ = it2->second.min_time_;
4018+
}
4019+
if (it->second.max_time_ < it2->second.max_time_) {
4020+
it->second.max_time_ = it2->second.max_time_;
4021+
}
4022+
it->second.call_count_ += it2->second.call_count_;
4023+
it2 = global_device_time_stats_->erase(it2);
4024+
}
4025+
else {
4026+
it2++;
4027+
}
4028+
}
4029+
}
4030+
}
4031+
39834032
private: // Data
39844033
zel_tracer_handle_t tracer_ = nullptr;
39854034
CollectorOptions options_;

0 commit comments

Comments
 (0)