Skip to content

Commit ab33c25

Browse files
jeejeeleeAlvant
authored andcommitted
[Bugfix]Fix MiniCPM's LoRA bug (vllm-project#9286)
Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent 530b840 commit ab33c25

File tree

3 files changed

+40
-17
lines changed

3 files changed

+40
-17
lines changed

vllm/lora/models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,11 @@ def __init__(
337337
self.packed_modules_mapping = copy.deepcopy(
338338
self.model.packed_modules_mapping)
339339
# Used to indicate whether the model is a multimodal model
340-
self.supports_mm: bool = supports_multimodal(self.model)
340+
self.supports_mm: bool = (
341+
supports_multimodal(self.model)
342+
# In case the model only supports LoRA for
343+
# text modules (e.g. ChatGLM)
344+
and hasattr(self.model, "get_mm_mapping"))
341345
self.packed_modules: Dict[str, List[str]] = {}
342346
self.modules: Dict[str, "BaseLayerWithLoRA"] = {}
343347
# Dict instead of a Set for compatibility with LRUCache.

vllm/model_executor/models/minicpm.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -474,17 +474,18 @@ def __init__(
474474
unpadded_vocab_size = config.vocab_size
475475
if lora_config:
476476
unpadded_vocab_size += lora_config.lora_extra_vocab_size
477-
if not self.config.tie_word_embeddings:
478-
self.lm_head = ParallelLMHead(
479-
unpadded_vocab_size,
480-
config.hidden_size,
481-
org_num_embeddings=config.vocab_size,
482-
padding_size=DEFAULT_VOCAB_PADDING_SIZE
483-
# We need bigger padding if using lora for kernel
484-
# compatibility
485-
if not lora_config else lora_config.lora_vocab_padding_size,
486-
quant_config=quant_config,
487-
)
477+
self.lm_head = ParallelLMHead(
478+
unpadded_vocab_size,
479+
config.hidden_size,
480+
org_num_embeddings=config.vocab_size,
481+
padding_size=DEFAULT_VOCAB_PADDING_SIZE
482+
# We need bigger padding if using lora for kernel
483+
# compatibility
484+
if not lora_config else lora_config.lora_vocab_padding_size,
485+
quant_config=quant_config,
486+
)
487+
if config.tie_word_embeddings:
488+
self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
488489
self.scale_width = self.config.hidden_size / self.config.dim_model_base
489490

490491
self.logits_processor = LogitsProcessor(unpadded_vocab_size,
@@ -517,11 +518,7 @@ def compute_logits(
517518
sampling_metadata: SamplingMetadata,
518519
) -> Optional[torch.Tensor]:
519520
hidden_states = hidden_states / self.scale_width
520-
if self.config.tie_word_embeddings:
521-
lm_head = self.model.embed_tokens
522-
else:
523-
lm_head = self.lm_head
524-
logits = self.logits_processor(lm_head, hidden_states,
521+
logits = self.logits_processor(self.lm_head, hidden_states,
525522
sampling_metadata)
526523
return logits
527524

vllm/model_executor/models/minicpm3.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,28 @@ def _init_layers(
216216

217217

218218
class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
219+
packed_modules_mapping = {
220+
"gate_up_proj": [
221+
"gate_proj",
222+
"up_proj",
223+
],
224+
}
225+
226+
# LoRA specific attributes
227+
supported_lora_modules = [
228+
"kv_a_proj_with_mqa",
229+
"q_a_proj",
230+
"q_b_proj",
231+
"kv_b_proj",
232+
"o_proj",
233+
"gate_up_proj",
234+
"down_proj",
235+
"embed_tokens",
236+
"lm_head",
237+
]
238+
239+
# `embedding_modules` and `embedding_padding_modules`
240+
# are inherited from MiniCPMForCausalLM
219241

220242
def _init_model(self):
221243
self.model = MiniCPM3Model(config=self.config,

0 commit comments

Comments
 (0)