Skip to content

Commit 13c73e8

Browse files
Update modeling_llama4.py (huggingface#37841)
* Update modeling_llama4.py * Update modeling_llama4.py * do not pass device --------- Co-authored-by: raushan <raushan@huggingface.co>
1 parent c3012a2 commit 13c73e8

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/transformers/models/llama4/modeling_llama4.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,6 @@ def _prepare_4d_causal_attention_mask_with_cache_position(
873873
sequence_length: int,
874874
target_length: int,
875875
dtype: torch.dtype,
876-
device: torch.device,
877876
cache_position: torch.Tensor,
878877
batch_size: int,
879878
**kwargs,
@@ -906,16 +905,18 @@ def _prepare_4d_causal_attention_mask_with_cache_position(
906905
else:
907906
min_dtype = torch.finfo(dtype).min
908907
causal_mask = torch.full(
909-
(sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device
908+
(sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=cache_position.device
910909
)
911910
if sequence_length != 1:
912911
causal_mask = torch.triu(causal_mask, diagonal=1)
913-
causal_mask *= torch.arange(target_length, device=device) > cache_position.to(device).reshape(-1, 1)
912+
causal_mask *= torch.arange(target_length, device=cache_position.device) > cache_position.reshape(-1, 1)
914913
causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1)
915914
if attention_mask is not None:
916915
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
917916
mask_length = attention_mask.shape[-1]
918-
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(device)
917+
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
918+
cache_position.device
919+
)
919920
padding_mask = padding_mask == 0
920921
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
921922
padding_mask, min_dtype

0 commit comments

Comments
 (0)