@@ -186,10 +186,10 @@ def finalize(self, hidden_states: torch.Tensor,
186
186
self .moe_config .tp_group .device_group )
187
187
hidden_states = torch .cat (self .split_hidden_states , dim = 0 )
188
188
189
- # TODO: It is a quick bugfix for the single-operator memory explosion issue
190
- # that requires further restructuring.
191
- # If the cache is not cleared after `self.split_hidden_states` is created,
192
- # it can lead to the single-operator memory explosion.
189
+ # TODO: It is a quick bugfix for the memory explosion issue in eager mode
190
+ # that requires further restructuring.
191
+ # If the cache is not cleared after `self.split_hidden_states` is created,
192
+ # it can lead to the memory explosion in eager mode .
193
193
del self .split_hidden_states
194
194
195
195
# Unpad if necessary
@@ -276,10 +276,10 @@ def finalize(self, hidden_states: torch.Tensor,
276
276
self .moe_config .tp_group .device_group )
277
277
hidden_states = torch .cat (self .split_hidden_states , dim = 0 )
278
278
279
- # TODO: It is a quick bugfix for the single-operator memory explosion issue
280
- # that requires further restructuring.
281
- # If the cache is not cleared after `self.split_hidden_states` is created,
282
- # it can lead to the single-operator memory explosion.
279
+ # TODO: It is a quick bugfix for the memory explosion issue in eager mode
280
+ # that requires further restructuring.
281
+ # If the cache is not cleared after `self.split_hidden_states` is created,
282
+ # it can lead to the memory explosion in eager mode .
283
283
del self .split_hidden_states
284
284
285
285
if self .num_tokens < hidden_states .shape [0 ]:
0 commit comments