Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions src/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,24 +262,29 @@ def save_mutated_as_lora(peft_config, path_initial_model_for_weight_conversion,
str(peft_config.init_lora_weights).lower().startswith(prefix) for prefix in ["pissa", "olora", "true"]
):
warnings.warn(
"`path_initial_model_for_weight_conversion` only works for converting a PiSSA or OLoRA adapter to a LoRA adapter"
"`path_initial_model_for_weight_conversion` only works for converting a PiSSA or OLoRA adapter to "
"a LoRA adapter"
)
initial_adapter = os.path.basename(path_initial_model_for_weight_conversion)
self.load_adapter(
os.path.dirname(path_initial_model_for_weight_conversion),
subfolder=initial_adapter,
adapter_name=initial_adapter,
)
if any(
str(self.peft_config[initial_adapter].init_lora_weights).lower().startswith(prefix)
for prefix in ["pissa", "olora"]
):
raise ValueError(
"The `init_lora_weights` parameter of the initial adapter should be set to `True`. "
"Otherwise, `self.load_adapter` will subtract the decomposed values again based on the residual model."
initial_adapter_name = os.path.basename(path_initial_model_for_weight_conversion)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for sanity: it's 100 percent okay to assume that it's not against the user to always delete the initial adapter and not letting them choose it instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is pretty much no reason why users would want to load the initial model and delete the trained model. I pondered keeping both, but users could be surprised by the extra memory required, resulting for instance in evals that are run after training and saving the model to OOM.

The new behavior is documented in the release notes: https://github.com/huggingface/peft/releases/tag/untagged-f3a302cd5b4f9d08a46b

(Release planned later today)

try:
self.load_adapter(
os.path.dirname(path_initial_model_for_weight_conversion),
subfolder=initial_adapter_name,
adapter_name=initial_adapter_name,
)
is_pissa = str(self.peft_config[initial_adapter_name].init_lora_weights).lower().startswith("pissa")
is_olora = str(self.peft_config[initial_adapter_name].init_lora_weights).lower() == "olora"
if is_pissa or is_olora:
raise ValueError(
"The `init_lora_weights` parameter of the initial adapter should be set to `True`. "
"Otherwise, `self.load_adapter` will subtract the decomposed values again based on the "
"residual model."
)
output_state_dict = self.base_model.subtract_mutated_init(
output_state_dict, initial_adapter_name, kwargs
)
output_state_dict = self.base_model.subtract_mutated_init(output_state_dict, initial_adapter, kwargs)
self.delete_adapter(adapter_name)
finally:
self.delete_adapter(initial_adapter_name)
return output_state_dict

if is_main_process:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,13 @@ def test_lora_pissa_conversion_same_output_after_loading(self, data, tmp_path):
)

# save the model with conversion
peft_config_keys_before = list(peft_model.peft_config.keys())
peft_model.save_pretrained(
tmp_path / "pissa-model-converted", path_initial_model_for_weight_conversion=tmp_path / "init-model"
)
peft_config_keys_after = list(peft_model.peft_config.keys())
assert peft_config_keys_before == peft_config_keys_after

Comment on lines +326 to +332
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding. Why would this hold given we're deleting an adapter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To do the weight conversion, we have to temporarily load the initial adapter (i.e. from before it was trained). Before this PR, we would keep that initial adapter around but delete the actually trained adapter. This is surprising for users, who would rather keep the trained adapter around, e.g. to perform evals (see the linked issue #1860). With this PR, instead we keep the trained adapter around and delete the initial adapter, which was only really needed for the conversion step.

As such, I added the assert that the loaded adapters are the same before and after saving, which is also true when saving any other kind of model.

model_converted = PeftModel.from_pretrained(deepcopy(model), tmp_path / "pissa-model-converted")
output_converted = model_converted(data)[0]

Expand Down Expand Up @@ -408,9 +412,13 @@ def test_olora_conversion_same_output_after_loading(self, data, tmp_path):
)

# save the model with conversion
peft_config_keys_before = list(peft_model.peft_config.keys())
peft_model.save_pretrained(
tmp_path / "olora-model-converted", path_initial_model_for_weight_conversion=tmp_path / "init-model"
)
peft_config_keys_after = list(peft_model.peft_config.keys())
assert peft_config_keys_before == peft_config_keys_after

model_converted = PeftModel.from_pretrained(deepcopy(model), tmp_path / "olora-model-converted")
output_converted = model_converted(data)[0]

Expand Down
Loading