@@ -1110,6 +1110,9 @@ class ZeCollector {
1110
1110
std::vector<std::string> knames;
1111
1111
size_t max_name_size = 0 ;
1112
1112
global_device_time_stats_mutex_.lock ();
1113
+
1114
+ AggregateDeviceTimeStats ();
1115
+
1113
1116
std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list (
1114
1117
global_device_time_stats_->begin (), global_device_time_stats_->end ());
1115
1118
@@ -1197,6 +1200,9 @@ class ZeCollector {
1197
1200
std::vector<std::string> knames;
1198
1201
size_t max_name_size = 0 ;
1199
1202
global_device_time_stats_mutex_.lock ();
1203
+
1204
+ AggregateDeviceTimeStats ();
1205
+
1200
1206
std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list (
1201
1207
global_device_time_stats_->begin (), global_device_time_stats_->end ());
1202
1208
@@ -3980,6 +3986,49 @@ class ZeCollector {
3980
3986
local_device_submissions_.CollectHostFunctionTimeStats (id, time );
3981
3987
}
3982
3988
3989
+ void AggregateDeviceTimeStats () const {
3990
+ // do not acquire global_device_time_stats_mutex_. caller dos it.
3991
+ for (auto it = global_device_time_stats_->begin (); it != global_device_time_stats_->end (); it++) {
3992
+ std::string kname;
3993
+ if (it->first .tile_ >= 0 ) {
3994
+ kname = " Tile #" + std::to_string (it->first .tile_ ) + " : " + GetZeKernelCommandName (it->first .kernel_command_id_ , it->first .group_count_ , it->first .mem_size_ , options_.verbose );
3995
+ }
3996
+ else {
3997
+ kname = GetZeKernelCommandName (it->first .kernel_command_id_ , it->first .group_count_ , it->first .mem_size_ , options_.verbose );
3998
+ }
3999
+
4000
+ auto it2 = it;
4001
+ it2++;
4002
+
4003
+ for (; it2 != global_device_time_stats_->end ();) {
4004
+ std::string kname2;
4005
+ if (it2->first .tile_ >= 0 ) {
4006
+ kname2 = " Tile #" + std::to_string (it2->first .tile_ ) + " : " + GetZeKernelCommandName (it2->first .kernel_command_id_ , it2->first .group_count_ , it2->first .mem_size_ , options_.verbose );
4007
+ }
4008
+ else {
4009
+ kname2 = GetZeKernelCommandName (it2->first .kernel_command_id_ , it2->first .group_count_ , it2->first .mem_size_ , options_.verbose );
4010
+ }
4011
+
4012
+ if (kname2 == kname) {
4013
+ it->second .append_time_ += it2->second .append_time_ ;
4014
+ it->second .submit_time_ += it2->second .submit_time_ ;
4015
+ it->second .execute_time_ += it2->second .execute_time_ ;
4016
+ if (it->second .min_time_ > it2->second .min_time_ ) {
4017
+ it->second .min_time_ = it2->second .min_time_ ;
4018
+ }
4019
+ if (it->second .max_time_ < it2->second .max_time_ ) {
4020
+ it->second .max_time_ = it2->second .max_time_ ;
4021
+ }
4022
+ it->second .call_count_ += it2->second .call_count_ ;
4023
+ it2 = global_device_time_stats_->erase (it2);
4024
+ }
4025
+ else {
4026
+ it2++;
4027
+ }
4028
+ }
4029
+ }
4030
+ }
4031
+
3983
4032
private: // Data
3984
4033
zel_tracer_handle_t tracer_ = nullptr ;
3985
4034
CollectorOptions options_;
0 commit comments