Release: 0.17.1 changes (#2739)

BenjaminBossan · web-flow · commit 53c25fe4fdd7 · 2025-08-21T11:06:08.000+02:00
* FIX Multiple issues with target_parameters (#2710) * Bump version to 0.17.1
diff --git a/docs/source/developer_guides/lora.md b/docs/source/developer_guides/lora.md
@@ -276,7 +276,10 @@ The same logic applies to `alpha_pattern`. If you're in doubt, don't try to get
 
 Generally, you should use `target_modules` to target the module (e.g. `nn.Linear`). However, in some circumstances, this is not possible. E.g., in many mixture of expert (MoE) layers in HF Transformers, instead of using `nn.Linear`, an `nn.Parameter` is used. PEFT normally overwrites the `forward` method for LoRA, but for `nn.Parameter`, there is none. Therefore, to apply LoRA to that parameter, it needs to be targeted with `target_parameters`. As an example, for [Llama4](https://huggingface.co/collections/meta-llama/llama-4-67f0c30d9fe03840bc9d0164), you can pass: `target_parameters=['feed_forward.experts.gate_up_proj', 'feed_forward.experts.down_proj]`.
 
-At the moment, this argument allows to target 2-dim or 3-dim `nn.Parameter`s. It is assumed that in the case of a 3-dim parameter, the 0th dimension is the expert dimension.
+#### Caveats
+
+- At the moment, this argument allows to target 2-dim or 3-dim `nn.Parameter`s. It is assumed that in the case of a 3-dim parameter, the 0th dimension is the expert dimension.
+- It is currently not possible to add multiple LoRA adapters (via `model.add_adapter` or `model.load_adapter`) that use `target_parameters` at the same time.
 
 ## Optimizers
 
diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 from setuptools import find_packages, setup
 
 
-VERSION = "0.17.0"
+VERSION = "0.17.1"
 
 extras = {}
 extras["quality"] = [
diff --git a/src/peft/__init__.py b/src/peft/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.17.0"
+__version__ = "0.17.1"
 
 from .auto import (
     MODEL_TYPE_TO_PEFT_MODEL_MAPPING,
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
@@ -2015,14 +2015,28 @@ def _remove_parametrizations(self):
                 "Something went wrong, please report this issue on PEFT: https://github.com/huggingface/peft/issues"
             )
 
-        if len(base_layer.parametrizations[parameter_name]) == 1:
+        param_list = base_layer.parametrizations[parameter_name]
+        if len(param_list) == 1:
             # last parametrization, we can safely remove it completely
             nn.utils.parametrize.remove_parametrizations(base_layer, parameter_name, leave_parametrized=False)
-        else:
-            # TODO: If there are multiple parametrizations for the same parameter_name, we currently remove all of them,
-            # which is not desired. Unfortunately, PyTorch does not support this directly, so we need to take care.
-            # For now, remove all parametrizations.
-            nn.utils.parametrize.remove_parametrizations(base_layer, parameter_name, leave_parametrized=False)
+            return
+
+        # If there are multiple parametrizations for the same parameter_name, we only want to remove the LoRA proxy.
+        # Unfortunately, PyTorch does not support this directly, so we need to take care of it manually. To achieve
+        # this, we check the ParameterList from the back until we find the _LoraParameterProxy instance and then remove
+        # it.
+        reversed_indices = reversed(range(len(param_list)))
+        for i in reversed_indices:
+            module = param_list[i]
+            if isinstance(module, _LoraParameterProxy):
+                del param_list[i]
+                break
+        else:  # no break encountered
+            # this should not happen, but raising an error is probably not necessary
+            warnings.warn(
+                f"Could not find any LoRA parametrization on {self}, please open an issue on "
+                "https://github.com/huggingface/peft/issues and report this warning."
+            )
 
     def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
         # same as lora.Linear.merge but not hard-coding base_layer.weight and without special cases like variants removed
@@ -2106,6 +2120,10 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
 
     def __repr__(self) -> str:
         rep = super().__repr__()
+        idx = rep.find("(") + 1
+        # insert the name of the parameter to allow the repr to be disambiguous when multiple parameters on the same
+        # module are being targeted
+        rep = f"{rep[:idx]}\n  parameter_name='{self.parameter_name}',{rep[idx:]}"
         return "lora." + rep
 
 
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
@@ -185,6 +185,18 @@ def _create_and_replace(
         if current_key is None:
             raise ValueError("Current Key shouldn't be `None`")
 
+        if lora_config.target_parameters:
+            # Right now, unfortunately, we don't support multiple adapters with target_parameters on the same model.
+            other_configs_use_target_params = any(
+                conf.target_parameters for key, conf in self.peft_config.items() if key != adapter_name
+            )
+            if other_configs_use_target_params:
+                raise ValueError(
+                    f"Adding a LoRA config with `target_parameters={lora_config.target_parameters}` but there are "
+                    "already other LoRA adapters on this model that use `target_parameters`. At the moment, only "
+                    "one LoRA adapter per model with `target_parameters` is allowed."
+                )
+
         # Regexp matching - Find key which matches current target_name in patterns provided
         r_key = get_pattern_key(lora_config.rank_pattern.keys(), current_key)
         alpha_key = get_pattern_key(lora_config.alpha_pattern.keys(), current_key)
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
@@ -722,43 +722,77 @@ def inject_adapter(
     def _inject_parameters(
         self, peft_config: PeftConfig, model: nn.Module, adapter_name: str, low_cpu_mem_usage: bool
     ) -> None:
-        # TODO very simple matching, might not cover all use cases
-        target_names = set(peft_config.target_parameters)
-        for module_name, module in model.named_modules():
-            for param_name, param in module.named_parameters(recurse=False):
-                # It is possible that the layer is already a PEFT layer and needs updating with a new adapter. In this
-                # case, the name of parameter would be something like `model.layers.0.experts.base_layer.weight`, i.e.
-                # there is a "base_layer" inserted in the name. We need to remove that, otherwise we won't be able to
-                # match correctly (in this case, "experts.weight" would not match).
-                prefix, _, suffix = module_name.rpartition(".base_layer")
+        """Inject layers based on peft_config.target_modules"""
+
+        def strip_base_layer_from_name(module_name):
+            # It is possible that the layer is already a PEFT layer and needs updating with a new adapter. In this case,
+            # the name of parameter would be something like `model.layers.0.experts.base_layer.weight`, i.e. there is a
+            # "base_layer" inserted in the name. We need to remove that, otherwise we won't be able to match correctly
+            # (in this case, "experts.weight" would not match).
+            name = ".base_layer"
+            while name in module_name:
+                prefix, _, suffix = module_name.rpartition(name)
                 module_name = prefix + suffix
-                key = f"{module_name}.{param_name}"
-                # we're interested in finding the "lowest" module that contains the parameter, hence recurse=False
-                if (key in target_names) or any(key.endswith(f".{target_key}") for target_key in target_names):
-                    self.targeted_parameter_names.append(key)
+            return module_name
+
+        def create_and_replace_param(module_name, key, param_name):
+            # helper function to avoid duplication
+            parent, target, target_name = _get_submodules(model, module_name)
+            unwrapped_module_name = strip_base_layer_from_name(module_name)
+            unwrapped_module = model.get_submodule(unwrapped_module_name)
+            # use the class name for checking to avoid circular import
+            if isinstance(unwrapped_module, BaseTunerLayer) and unwrapped_module.__class__.__name__ != "ParamWrapper":
+                raise ValueError(
+                    f"Trying to wrap an `nn.Parameter` of layer '{unwrapped_module_name}' of type "
+                    f"{type(target).__name__}, which is not a valid target. Make sure that this layer is not "
+                    "also targeted with `target_modules`. For some models, PEFT will do this automatically, "
+                    "try setting `target_modules=[]` to prevent it."
+                )
 
-                    parent, target, target_name = _get_submodules(model, module_name)
-                    # use the class name for checking to avoid circular import
-                    if isinstance(target, BaseTunerLayer) and target.__class__.__name__ != "ParamWrapper":
-                        raise ValueError(
-                            f"Trying to wrap an `nn.Parameter` of layer '{target_name}' of type "
-                            f"{type(target).__name__}, which is not a valid target. Make sure that this layer is not "
-                            "also targeted with `target_modules`. For some models, PEFT will do this automatically, "
-                            "try setting `target_modules=[]` to prevent it."
-                        )
+            self._check_target_module_compatiblity(peft_config, model, target_name)
+            ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
+            with ctx():
+                self._create_and_replace(
+                    peft_config,
+                    adapter_name,
+                    target,
+                    target_name,
+                    parent,
+                    current_key=key,
+                    parameter_name=param_name.rpartition(".")[-1],
+                )
 
-                    self._check_target_module_compatiblity(peft_config, model, target_name)
-                    ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
-                    with ctx():
-                        self._create_and_replace(
-                            peft_config,
-                            adapter_name,
-                            target,
-                            target_name,
-                            parent,
-                            current_key=key,
-                            parameter_name=param_name.rpartition(".")[-1],
-                        )
+        # TODO very simple matching, might not cover all use cases
+        unsorted_target_names = set(peft_config.target_parameters)
+        # As the order of matching can influence the nesting of multiple params on the same module, ensure determinism
+        # by sorting.
+        target_names = sorted(unsorted_target_names)
+        for module_name, module in model.named_modules():
+            if hasattr(module, "parametrizations"):
+                # Deal with the case that the parameter is already parametrized. The issue is that we would not be able
+                # to match `f"{module_name}.{param_name}"`, as the parameter is now something like
+                # `module.parametrization.weight`.
+                for key in target_names:
+                    target_module_name, _, param_name = key.rpartition(".")
+                    if target_module_name != module_name:
+                        continue
+                    if getattr(module, param_name, None) is None:
+                        continue
+                    create_and_replace_param(module_name, key, param_name)
+                    self.targeted_parameter_names.append(key)
+            else:
+                # Standard case: the parameter is not already parametrized. Note, however, that the model could already
+                # be nested with lora.ParamWrapper, as this is how we allow targeting multiple Parameters on the same
+                # module.
+                unwrapped_module_name = strip_base_layer_from_name(module_name)
+                # we're interested in finding the "lowest" module that contains the parameter, hence recurse=False
+                for param_name, param in module.named_parameters(recurse=False):
+                    key = f"{unwrapped_module_name}.{param_name}"
+                    if (key in target_names) or any(key.endswith(f".{target_key}") for target_key in target_names):
+                        # Note: We use the unwrapped_module_name to check if the key matches, but we use the module_name for
+                        # replacement, since we want to replace the wrapped module.
+                        create_and_replace_param(module_name, key, param_name)
+                        self.targeted_parameter_names.append(key)
 
     def merge_adapter(self, adapter_names: Optional[list[str]] = None, safe_merge: bool = False) -> None:
         """
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -936,6 +936,11 @@
 }
 
 
+def _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs):
+    if (config_cls == LoraConfig) and config_kwargs.get("target_parameters"):
+        pytest.skip("LoRA with multiple adapters with target_parameters is not supported")
+
+
 class MLP(nn.Module):
     def __init__(self, bias=True):
         super().__init__()
@@ -1389,6 +1394,7 @@ def test_save_pretrained_pickle(self, test_name, model_id, config_cls, config_kw
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_load_model_low_cpu_mem_usage(self, test_name, model_id, config_cls, config_kwargs):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs)
         self._test_load_model_low_cpu_mem_usage(model_id, config_cls, config_kwargs)
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
@@ -1397,6 +1403,7 @@ def test_from_pretrained_config_construction(self, test_name, model_id, config_c
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_load_multiple_adapters(self, test_name, model_id, config_cls, config_kwargs):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs)
         self._test_load_multiple_adapters(model_id, config_cls, config_kwargs)
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
@@ -1995,6 +2002,8 @@ def run_with_disable(config_kwargs, bias):
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_active_adapter(self, test_name, model_id, config_cls, config_kwargs):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs)
+
         model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
         config = config_cls(
             base_model_name_or_path=model_id,
@@ -2085,10 +2094,12 @@ def test_disable_adapters_exiting_context_irregular_state(self, test_name, model
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_delete_adapter(self, test_name, model_id, config_cls, config_kwargs):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs)
         self._test_delete_adapter(model_id, config_cls, config_kwargs)
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_delete_inactive_adapter(self, test_name, model_id, config_cls, config_kwargs):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs)
         self._test_delete_inactive_adapter(model_id, config_cls, config_kwargs)
 
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
@@ -2786,6 +2797,19 @@ def test_repr_lora_conv2d(self):
         assert "lora_B" in print_output
         assert "default" in print_output
 
+    def test_repr_lora_paramwrapper(self):
+        config = LoraConfig(target_parameters=["lin0.weight"])
+        model = get_peft_model(MLP(), config)
+        print_output = repr(model.model.lin0)
+        assert print_output.startswith("lora.ParamWrapper")
+        # important: targeted parameter should be contained:
+        assert "parameter_name='weight'" in print_output
+        assert "in_features=10" in print_output
+        assert "out_features=20" in print_output
+        assert "lora_A" in print_output
+        assert "lora_B" in print_output
+        assert "default" in print_output
+
 
 class TestMultipleActiveAdapters:
     """
@@ -2820,6 +2844,8 @@ def resolve_model_cls(self, tuner_method):
     def test_multiple_active_adapters_forward(
         self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2
     ):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs_2)
+
         torch.manual_seed(0)
 
         model = self.resolve_model_cls(tuner_method)
@@ -2878,6 +2904,8 @@ def test_multiple_active_adapters_forward(
     def test_multiple_active_adapters_merge_and_unmerge(
         self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2
     ):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs_2)
+
         torch.manual_seed(0)
 
         model = self.resolve_model_cls(tuner_method)
@@ -2911,6 +2939,8 @@ def test_multiple_active_adapters_merge_and_unmerge(
         "test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2", MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES
     )
     def test_merge_layers_multi(self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2):
+        _skip_tests_with_multiple_adapters_with_target_parameters(config_cls, config_kwargs_2)
+
         torch.manual_seed(0)
 
         model = self.resolve_model_cls(tuner_method)
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
@@ -1406,7 +1406,7 @@ def __init__(self):
                 self.linear = nn.Linear(10, 10)
 
         base_model = MyModule()
-        config = LoraConfig(target_modules=["linear"], target_parameters=["weight"])
+        config = LoraConfig(target_modules=["linear"], target_parameters=["linear.weight"])
         msg = "Trying to wrap an `nn.Parameter` of layer 'linear' of type Linear, which is not a valid target."
         with pytest.raises(ValueError, match=msg):
             get_peft_model(base_model, config)
@@ -1445,6 +1445,26 @@ def test_valid_target_modules_invalid_target_parameters_warns(self):
         with pytest.warns(RuntimeWarning, match=msg):
             get_peft_model(model, config)
 
+    def test_adding_multiple_adapters_with_target_parameters_raises(self):
+        model = self.get_model()
+        config = LoraConfig(target_modules=[], target_parameters=["linear.weight"])
+        model = get_peft_model(model, config)
+        msg = re.escape("only one LoRA adapter per model with `target_parameters` is allowed")
+        with pytest.raises(ValueError, match=msg):
+            model.add_adapter(adapter_name="other", peft_config=config)
+
+    def test_loading_loading_adapters_with_target_parameters_raises(self, tmp_path):
+        model = self.get_model()
+        config = LoraConfig(target_modules=[], target_parameters=["linear.weight"])
+        model = get_peft_model(model, config)
+        model.save_pretrained(tmp_path)
+
+        model = self.get_model()
+        model = PeftModel.from_pretrained(model, tmp_path)
+        msg = re.escape("only one LoRA adapter per model with `target_parameters` is allowed")
+        with pytest.raises(ValueError, match=msg):
+            model.load_adapter(tmp_path, adapter_name="other")
+
 
 class TestLokrInitialization:
     torch_device = infer_device()
diff --git a/tests/test_target_parameters.py b/tests/test_target_parameters.py
diff --git a/tests/testing_common.py b/tests/testing_common.py