Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class Cohere2VisionCausalLMOutputWithPast(ModelOutput):
@auto_docstring
class Cohere2VisionPreTrainedModel(PreTrainedModel):
config: Cohere2VisionConfig
base_model_prefix = ""
base_model_prefix = "model"
input_modalities = ["image", "text"]
supports_gradient_checkpointing = True
_skip_keys_device_placement = "past_key_values"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
AyaVisionForConditionalGeneration,
AyaVisionModel,
AyaVisionModelOutputWithPast,
AyaVisionPreTrainedModel,
)
from transformers.models.got_ocr2.image_processing_got_ocr2_fast import GotOcr2ImageProcessorFast

Expand Down Expand Up @@ -89,6 +90,10 @@ class Cohere2VisionCausalLMOutputWithPast(AyaVisionCausalLMOutputWithPast):
pass


class Cohere2VisionPreTrainedModel(AyaVisionPreTrainedModel):
base_model_prefix = "model"


class Cohere2VisionModel(AyaVisionModel):
_checkpoint_conversion_mapping = {}

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/florence2/modeling_florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ class Florence2Seq2SeqLMOutput(Seq2SeqLMOutput):
@auto_docstring
class Florence2PreTrainedModel(PreTrainedModel):
config: Florence2Config
base_model_prefix = ""
base_model_prefix = "model"
input_modalities = ["image", "text"]
supports_gradient_checkpointing = True
_skip_keys_device_placement = "past_key_values"
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/florence2/modular_florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,6 +1500,7 @@ class Florence2Seq2SeqLMOutput(Seq2SeqLMOutput):
@auto_docstring
class Florence2PreTrainedModel(LlavaPreTrainedModel):
config_class = Florence2Config
base_model_prefix = "model"

_supports_attention_backend = False

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/gemma3n/modeling_gemma3n.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,7 +1584,7 @@ def forward(
@auto_docstring
class Gemma3nPreTrainedModel(PreTrainedModel):
config: Gemma3nConfig
base_model_prefix = ""
base_model_prefix = "model"
supports_gradient_checkpointing = True
_no_split_modules = ["Gemma3nTextDecoderLayer"]
_skip_keys_device_placement = ["past_key_values"]
Expand Down
1 change: 0 additions & 1 deletion src/transformers/models/gemma3n/modular_gemma3n.py
Original file line number Diff line number Diff line change
Expand Up @@ -1872,7 +1872,6 @@ def forward(

class Gemma3nPreTrainedModel(Gemma2PreTrainedModel):
config: Gemma3nConfig
base_model_prefix = ""
input_modalities = ["image", "text", "audio"]
_no_split_modules = ["Gemma3nTextDecoderLayer"]

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/lfm2_vl/modeling_lfm2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def pixel_unshuffle(self, hidden_states: torch.Tensor):
@auto_docstring
class Lfm2VlPreTrainedModel(PreTrainedModel):
config: Lfm2VlConfig
base_model_prefix = ""
base_model_prefix = "model"
input_modalities = ["image", "text"]
supports_gradient_checkpointing = True
_skip_keys_device_placement = "past_key_values"
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/lfm2_vl/modular_lfm2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def pixel_unshuffle(self, hidden_states: torch.Tensor):

class Lfm2VlPreTrainedModel(LlavaPreTrainedModel):
_can_compile_fullgraph = False
base_model_prefix = "model"


class Lfm2VlCausalLMOutputWithPast(LlavaCausalLMOutputWithPast):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/qwen3_vl/modeling_qwen3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,7 @@ def _deepstack_process(

@auto_docstring
class Qwen3VLModel(Qwen3VLPreTrainedModel):
base_model_prefix = ""
base_model_prefix = "model"
_checkpoint_conversion_mapping = {}
# Reference: fix gemma3 grad acc #37208
accepts_loss_kwargs = False
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/qwen3_vl/modular_qwen3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ def forward(
@auto_docstring
class Qwen3VLModel(Qwen2_5_VLModel):
config: Qwen3VLConfig
base_model_prefix = "model"
_checkpoint_conversion_mapping = {}
_no_split_modules = ["Qwen3VLTextDecoderLayer", "Qwen3VLVisionBlock"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,7 @@ class Qwen3VLMoeModelOutputWithPast(ModelOutput):

@auto_docstring
class Qwen3VLMoeModel(Qwen3VLMoePreTrainedModel):
base_model_prefix = ""
base_model_prefix = "model"
_checkpoint_conversion_mapping = {}
# Reference: fix gemma3 grad acc #37208
accepts_loss_kwargs = False
Expand Down