@@ -31,9 +31,8 @@ struct load_store_attr_t {
31
31
static constexpr bool has_hw_block_2d = false ;
32
32
};
33
33
34
- template <>
35
- struct load_store_attr_t <msg_type::block_2d, gpu_arch::XeHpc> {
36
- // / HW limitation checks https://gfxspecs.intel.com/Predator/Home/Index/55490
34
+ template <msg_type message_type, gpu_arch arg_tag>
35
+ struct xe_plus_load_store_attr_t {
37
36
static constexpr bool has_hw_block_2d = true ;
38
37
static constexpr uint32_t max_load_height_in_elem = 32 ;
39
38
static constexpr uint32_t max_load_width_in_bytes = 64 ;
@@ -55,10 +54,9 @@ struct load_store_attr_t<msg_type::block_2d, gpu_arch::XeHpc> {
55
54
56
55
template <msg_type message_type, gpu_arch arg_tag>
57
56
struct client_load_store_attr_base_t {
58
- // / HW limitation checks https://gfxspecs.intel.com/Predator/Home/Index/55490
59
57
static constexpr bool has_hw_block_2d = false ;
60
- static constexpr uint32_t max_load_height_in_elem = 32 ;
61
- static constexpr uint32_t max_load_width_in_bytes = 64 ;
58
+ static constexpr uint32_t max_load_height_in_elem = 0 ;
59
+ static constexpr uint32_t max_load_width_in_bytes = 0 ;
62
60
static constexpr uint32_t max_trans_load_width_in_bytes = 32 ;
63
61
static constexpr uint32_t max_vnni_load_width_in_elems = 16 ;
64
62
static constexpr uint32_t min_vnni_load_height_in_bytes = 4 ;
@@ -87,6 +85,18 @@ struct load_store_attr_t<msg_type::block_2d, gpu_arch::XeLpg>
87
85
msg_type::block_2d,
88
86
gpu_arch::XeLpg> {};
89
87
88
+ template <>
89
+ struct load_store_attr_t <msg_type::block_2d, gpu_arch::XeHpc>
90
+ : public client_load_store_attr_base_t <
91
+ msg_type::block_2d,
92
+ gpu_arch::XeHpc> {};
93
+
94
+ template <>
95
+ struct load_store_attr_t <msg_type::block_2d, gpu_arch::Xe2>
96
+ : public client_load_store_attr_base_t <
97
+ msg_type::block_2d,
98
+ gpu_arch::Xe2> {};
99
+
90
100
template <gpu_arch arch_tag>
91
101
inline constexpr bool arch_has_2d_load_store =
92
102
load_store_attr_t <msg_type::block_2d, arch_tag>::has_hw_block_2d;
@@ -105,6 +115,13 @@ struct load_store_attr_t<msg_type::block_1d, gpu_arch::XeHpc> {
105
115
static constexpr uint32_t max_prefetch_vec_len = 64 ;
106
116
};
107
117
118
+ template <>
119
+ struct load_store_attr_t <msg_type::block_1d, gpu_arch::Xe2> {
120
+ static constexpr uint32_t max_load_vec_len = 512 ;
121
+ static constexpr uint32_t max_store_vec_len = 512 ;
122
+ static constexpr uint32_t max_prefetch_vec_len = 64 ;
123
+ };
124
+
108
125
struct dpas_attr_base_t {
109
126
static constexpr bool has_xmx = true ;
110
127
static constexpr uint32_t systolic_depth = 8 ;
@@ -129,6 +146,11 @@ struct dpas_attr_t<gpu_arch::XeHpg> : public dpas_attr_base_t {
129
146
static constexpr uint32_t n_fixed_limit = 8 ;
130
147
};
131
148
149
+ template <>
150
+ struct dpas_attr_t <gpu_arch::Xe2> : public dpas_attr_t <gpu_arch::XeHpc> {
151
+ static constexpr uint32_t systolic_depth = 4 ;
152
+ };
153
+
132
154
template <gpu_arch arch_tag>
133
155
inline constexpr bool arch_has_xmx = dpas_attr_t <arch_tag>::has_xmx;
134
156
@@ -162,6 +184,10 @@ template <>
162
184
struct register_bytes_t <gpu_arch::XeLpg> {
163
185
static constexpr uint32_t reg_in_bytes = 32 ;
164
186
};
187
+ template <>
188
+ struct register_bytes_t <gpu_arch::Xe2> {
189
+ static constexpr uint32_t reg_in_bytes = 64 ;
190
+ };
165
191
166
192
template <grf_mode grf_num_mode, gpu_arch arch_tag>
167
193
struct register_attr_t {
@@ -236,10 +262,25 @@ struct arch_attr_t<gpu_arch::XeLpg> {
236
262
237
263
using dpas_attr = dpas_attr_t <gpu_arch::XeLpg>;
238
264
239
- static constexpr uint32_t max_wg_num = 64 ;
265
+ static constexpr uint32_t max_wg_num = 16 ;
240
266
static constexpr uint32_t local_mem_size = 64 * 1024 ;
241
267
};
242
268
269
+ template <>
270
+ struct arch_attr_t <gpu_arch::Xe2> {
271
+ template <msg_type message_type = msg_type::block_2d>
272
+ using load_store_attr = load_store_attr_t <message_type, gpu_arch::Xe2>;
273
+
274
+ template <grf_mode grf_num_mode = grf_mode::double_grf>
275
+ using register_attr = register_attr_t <grf_num_mode, gpu_arch::Xe2>;
276
+
277
+ using dpas_attr = dpas_attr_t <gpu_arch::Xe2>;
278
+
279
+ static constexpr uint32_t max_wg_num = 16 ;
280
+ static constexpr uint32_t local_mem_size = 128 * 1024 ;
281
+ };
282
+
283
+
243
284
// / @} xetla_core_arch_config
244
285
245
286
} // namespace gpu::xetla
0 commit comments