Skip to content

[Question] Does Unsloth support GRPO finetuning with vision models? #2931

@Sweaterdog

Description

@Sweaterdog

I am attempting to run GRPO on a model I primed using PPO with reasoning data. And to keep the reasoning data I want to fine-tune the model on non-reasoning examples with GRPO to make it smarter. However when I run the notebook with FastModel or FastVisionModel with a Qwen 2.5 VL model, I get the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[9], line 5
      2 import torch
      3 max_seq_length = 16000 # Can increase for longer reasoning traces
----> 5 model, tokenizer = FastModel.from_pretrained(
      6     model_name = "./charles_outputs/checkpoint-25000",
      7     max_seq_length = max_seq_length,
      8     load_in_4bit = True, # False for LoRA 16bit
      9     fast_inference = True, # Enable vLLM fast inference
     10     gpu_memory_utilization = 0.8, # Reduce if out of memory
     11 )

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/unsloth/models/loader.py:792, in FastModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, return_logits, fullgraph, use_exact_model_name, auto_model, whisper_language, whisper_task, unsloth_force_compile, *args, **kwargs)
    789 if auto_model is None:
    790     auto_model = AutoModelForVision2Seq if is_vlm else AutoModelForCausalLM
--> 792 model, tokenizer = FastBaseModel.from_pretrained(
    793     model_name        = model_name,
    794     max_seq_length    = max_seq_length,
    795     dtype             = _get_dtype(dtype),
    796     load_in_4bit      = load_in_4bit,
    797     load_in_8bit      = load_in_8bit,
    798     full_finetuning   = full_finetuning,
    799     token             = token,
    800     device_map        = device_map,
    801     trust_remote_code = trust_remote_code,
    802     revision          = revision if not is_peft else None,
    803     model_types       = model_types,
    804     tokenizer_name    = tokenizer_name,
    805     auto_model        = auto_model,
    806     use_gradient_checkpointing = use_gradient_checkpointing,
    807     supports_sdpa     = supports_sdpa,
    808     whisper_language  = whisper_language,
    809     whisper_task      = whisper_task,            
    810     *args, **kwargs,
    811 )
    813 if resize_model_vocab is not None:
    814     model.resize_token_embeddings(resize_model_vocab)

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/unsloth/models/vision.py:423, in FastBaseModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, trust_remote_code, model_types, tokenizer_name, auto_model, use_gradient_checkpointing, supports_sdpa, whisper_language, whisper_task, **kwargs)
    420 torch_dtype = dtype
    421 if do_forced_float32: torch_dtype = torch.bfloat16
--> 423 model = auto_model.from_pretrained(
    424     model_name,
    425     device_map              = device_map,
    426     torch_dtype             = torch_dtype,
    427     # quantization_config   = bnb_config,
    428     token                   = token,
    429     trust_remote_code       = trust_remote_code,
    430     # attn_implementation   = attn_implementation,
    431     **kwargs,
    432 )
    433 # Return old flag
    434 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py:600, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    598     if model_class.config_class == config.sub_configs.get("text_config", None):
    599         config = config.get_text_config()
--> 600     return model_class.from_pretrained(
    601         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    602     )
    603 raise ValueError(
    604     f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
    605     f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
    606 )

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:311, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
    309 old_dtype = torch.get_default_dtype()
    310 try:
--> 311     return func(*args, **kwargs)
    312 finally:
    313     torch.set_default_dtype(old_dtype)

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:4766, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4758     config = cls._autoset_attn_implementation(
   4759         config,
   4760         torch_dtype=torch_dtype,
   4761         device_map=device_map,
   4762     )
   4764 with ContextManagers(model_init_context):
   4765     # Let's make sure we don't run the init function of buffer modules
-> 4766     model = cls(config, *model_args, **model_kwargs)
   4768 # Make sure to tie the weights correctly
   4769 model.tie_weights()

TypeError: Qwen2_5_VLForConditionalGeneration.__init__() got an unexpected keyword argument 'fast_inference'

So if I go ahead and run it with the default FastLanguageModel it seems to go ahead just fine, and I get no error. Now the big part of where I am getting my issue is that the model I am loading already has LoRA adapters, do I need to convert the model to 16-bit, then load the merged 16-bit model into it as 4-bit, then apply new adapters? I feel that is the issue since I get this error when I try to run the training process.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[6], line 1
----> 1 trainer = GRPOTrainer(
      2     model = model,
      3     processing_class = tokenizer,
      4     reward_funcs = [
      5         int_reward_func,
      6         correctness_reward_func,
      7         think_format_reward_func,
      8         thinkcount_reward_func,
      9     ],
     10     args = training_args,
     11     train_dataset = dataset,
     12 )
     13 trainer.train()

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/unsloth/trainer.py:209, in _backwards_compatible_trainer.<locals>.new_init(self, *args, **kwargs)
    207     kwargs["args"] = config
    208 pass
--> 209 original_init(self, *args, **kwargs)

File ~/Desktop/Coding_Projects/Unsloth/unsloth_compiled_cache/UnslothGRPOTrainer.py:2482, in UnslothGRPOTrainer.__init__(self, model, reward_funcs, args, train_dataset, eval_dataset, processing_class, reward_processing_classes, callbacks, peft_config, **kwargs)
   2479 from unsloth_zoo.logging_utils import PatchRLStatistics
   2480 PatchRLStatistics('grpo_trainer', other_metrics)
-> 2482 super().__init__(
   2483     model = model,
   2484     reward_funcs = reward_funcs,
   2485     args = args,
   2486     train_dataset = train_dataset,
   2487     eval_dataset = eval_dataset,
   2488     processing_class = processing_class,
   2489     reward_processing_classes = reward_processing_classes,
   2490     callbacks = callbacks,
   2491     peft_config = peft_config,**kwargs)
   2492 if hasattr(self, 'neftune_hook_handle'):
   2493     self.neftune_hook_handle.remove()

File ~/Desktop/Coding_Projects/Unsloth/unsloth_compiled_cache/UnslothGRPOTrainer.py:1232, in _UnslothGRPOTrainer.__init__(self, model, reward_funcs, args, train_dataset, eval_dataset, processing_class, reward_processing_classes, callbacks, optimizers, peft_config)
   1224 # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the
   1225 # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the
   1226 # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning:
   1227 # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To
   1228 # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True.
   1229 # This acts as a flag to indicate that the warning has already been issued.
   1230 model.warnings_issued["estimate_tokens"] = True
-> 1232 super().__init__(
   1233     model=model,
   1234     args=args,
   1235     data_collator=identity,  # No data collation is needed in GRPO
   1236     train_dataset=train_dataset,
   1237     eval_dataset=eval_dataset,
   1238     processing_class=processing_class,
   1239     callbacks=callbacks,
   1240     optimizers=optimizers,
   1241 )
   1243 # Reference model
   1244 self.beta = args.beta

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/transformers/utils/deprecation.py:172, in deprecate_kwarg.<locals>.wrapper.<locals>.wrapped_func(*args, **kwargs)
    168 elif minimum_action in (Action.NOTIFY, Action.NOTIFY_ALWAYS) and not is_torchdynamo_compiling():
    169     # DeprecationWarning is ignored by default, so we use FutureWarning instead
    170     warnings.warn(message, FutureWarning, stacklevel=2)
--> 172 return func(*args, **kwargs)

File ~/Desktop/Coding_Projects/Unsloth/.venv/lib/python3.12/site-packages/transformers/trainer.py:565, in Trainer.__init__(self, model, args, data_collator, train_dataset, eval_dataset, processing_class, model_init, compute_loss_func, compute_metrics, callbacks, optimizers, optimizer_cls_and_kwargs, preprocess_logits_for_metrics)
    563 # At this stage the model is already loaded
    564 if _is_quantized_and_base_model and not _is_peft_model(model) and not _is_model_quantized_and_qat_trainable:
--> 565     raise ValueError(
    566         "You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of"
    567         " the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft"
    568         " for more details"
    569     )
    570 elif _is_quantized_and_base_model and not _quantization_method_supports_training:
    571     raise ValueError(
    572         f"The model you are trying to fine-tune is quantized with {model.hf_quantizer.quantization_config.quant_method}"
    573         " but that quantization method do not support training. Please open an issue on GitHub: https://github.com/huggingface/transformers"
    574         f" to request the support for training support for {model.hf_quantizer.quantization_config.quant_method}"
    575     )

ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details

So, is it possible to do vision model finetuning in Unsloth, or is that a feature that will be added later?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions