huggingface · rogeryoungh · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -600,6 +600,8 @@
         title: MegatronGPT2
       - local: model_doc/minimax
         title: MiniMax
+      - local: model_doc/minimax_m2
+        title: MiniMax-M2
       - local: model_doc/ministral
         title: Ministral
       - local: model_doc/mistral

diff --git a/docs/source/en/model_doc/minimax.md b/docs/source/en/model_doc/minimax.md
@@ -17,6 +17,8 @@ rendered properly in your Markdown viewer.
 
 # MiniMax
 
+> [MiniMax-M2](https://huggingface.co/docs/transformers/en/model_doc/minimax_m2) was released on 2025‑10‑27. We recommend using MiniMax‑M2 for most use cases due to better overall performance.
+
 ## Overview
 
 The MiniMax-Text-01 model was proposed in [MiniMax-01: Scaling Foundation Models with Lightning Attention](https://huggingface.co/papers/2501.08313) by MiniMax, Aonian Li, Bangwei Gong, Bo Yang, Boji Shan, Chang Liu, Cheng Zhu, Chunhao Zhang, Congchao Guo, Da Chen, Dong Li, Enwei Jiao, Gengxin Li, Guojun Zhang, Haohai Sun, Houze Dong, Jiadai Zhu, Jiaqi Zhuang, Jiayuan Song, Jin Zhu, Jingtao Han, Jingyang Li, Junbin Xie, Junhao Xu, Junjie Yan, Kaishun Zhang, Kecheng Xiao, Kexi Kang, Le Han, Leyang Wang, Lianfei Yu, Liheng Feng, Lin Zheng, Linbo Chai, Long Xing, Meizhi Ju, Mingyuan Chi, Mozhi Zhang, Peikai Huang, Pengcheng Niu, Pengfei Li, Pengyu Zhao, Qi Yang, Qidi Xu, Qiexiang Wang, Qin Wang, Qiuhui Li, Ruitao Leng, Shengmin Shi, Shuqi Yu, Sichen Li, Songquan Zhu, Tao Huang, Tianrun Liang, Weigao Sun, Weixuan Sun, Weiyu Cheng, Wenkai Li, Xiangjun Song, Xiao Su, Xiaodong Han, Xinjie Zhang, Xinzhu Hou, Xu Min, Xun Zou, Xuyang Shen, Yan Gong, Yingjie Zhu, Yipeng Zhou, Yiran Zhong, Yongyi Hu, Yuanxiang Fan, Yue Yu, Yufeng Yang, Yuhao Li, Yunan Huang, Yunji Li, Yunpeng Huang, Yunzhi Xu, Yuxin Mao, Zehan Li, Zekang Li, Zewei Tao, Zewen Ying, Zhaoyang Cong, Zhen Qin, Zhenhua Fan, Zhihang Yu, Zhuo Jiang, Zijia Wu.

diff --git a/docs/source/en/model_doc/minimax_m2.md b/docs/source/en/model_doc/minimax_m2.md
@@ -0,0 +1,82 @@
+<!--Copyright 2025 the HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
+
+-->
+
+
+# MiniMax-M2
+
+## Overview
+
+MiniMax-M2 redefines efficiency for agents. It's a compact, fast, and cost-effective MoE model (230 billion total parameters with 10 billion active parameters) built for elite performance in coding and agentic tasks, all while maintaining powerful general intelligence. With just 10 billion activated parameters, MiniMax-M2 provides the sophisticated, end-to-end tool use performance expected from today's leading models, but in a streamlined form factor that makes deployment and scaling easier than ever.
+
+For more details refer to the [release blog post](https://www.minimax.io/news/minimax-m2).
+
+## Usage examples
+
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
+
+model = AutoModelForCausalLM.from_pretrained("MiniMaxAI/MiniMax-M2", device_map="auto")
+
+tokenizer = AutoTokenizer.from_pretrained("MiniMaxAI/MiniMax-M2")
+
+generation_config = GenerationConfig.from_pretrained("MiniMaxAI/MiniMax-M2")
+
+messages = [
+    {"role": "user", "content": "What is your favourite condiment?"},
+    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+    {"role": "user", "content": "Do you have mayonnaise recipes?"}
+]
+
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to("cuda")
+
+generated_ids = model.generate(model_inputs, max_new_tokens=100, generation_config=generation_config)
+
+response = tokenizer.batch_decode(generated_ids)[0]
+
+print(response)
+```
+
+## MiniMaxM2Config
+
+[[autodoc]] MiniMaxM2Config
+
+## MiniMaxM2ForCausalLM
+
+[[autodoc]] MiniMaxM2ForCausalLM
+    - forward
+
+## MiniMaxM2ForQuestionAnswering
+
+[[autodoc]] MiniMaxM2ForQuestionAnswering
+    - forward
+
+## MiniMaxM2Model
+
+[[autodoc]] MiniMaxM2Model
+    - forward
+
+## MiniMaxM2ForSequenceClassification
+
+[[autodoc]] MiniMaxM2ForSequenceClassification
+    - forward
+
+## MiniMaxM2ForTokenClassification
+
+[[autodoc]] MiniMaxM2ForTokenClassification
+    - forward
diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
@@ -216,6 +216,7 @@
     from .mgp_str import *
     from .mimi import *
     from .minimax import *
+    from .minimax_m2 import *
     from .ministral import *
     from .mistral import *
     from .mistral3 import *

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -258,6 +258,7 @@
         ("mgp-str", "MgpstrConfig"),
         ("mimi", "MimiConfig"),
         ("minimax", "MiniMaxConfig"),
+        ("minimax_m2", "MiniMaxM2Config"),
         ("ministral", "MinistralConfig"),
         ("mistral", "MistralConfig"),
         ("mistral3", "Mistral3Config"),
@@ -711,6 +712,7 @@
         ("mgp-str", "MGP-STR"),
         ("mimi", "Mimi"),
         ("minimax", "MiniMax"),
+        ("minimax_m2", "MiniMax-M2"),
         ("ministral", "Ministral"),
         ("mistral", "Mistral"),
         ("mistral3", "Mistral3"),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -259,6 +259,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("mgp-str", "MgpstrForSceneTextRecognition"),
         ("mimi", "MimiModel"),
         ("minimax", "MiniMaxModel"),
+        ("minimax_m2", "MiniMaxM2Model"),
         ("ministral", "MinistralModel"),
         ("mistral", "MistralModel"),
         ("mistral3", "Mistral3Model"),
@@ -710,6 +711,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("mega", "MegaForCausalLM"),
         ("megatron-bert", "MegatronBertForCausalLM"),
         ("minimax", "MiniMaxForCausalLM"),
+        ("minimax_m2", "MiniMaxM2ForCausalLM"),
         ("ministral", "MinistralForCausalLM"),
         ("mistral", "MistralForCausalLM"),
         ("mixtral", "MixtralForCausalLM"),
@@ -1272,6 +1274,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("mega", "MegaForSequenceClassification"),
         ("megatron-bert", "MegatronBertForSequenceClassification"),
         ("minimax", "MiniMaxForSequenceClassification"),
+        ("minimax_m2", "MiniMaxM2ForSequenceClassification"),
         ("ministral", "MinistralForSequenceClassification"),
         ("mistral", "MistralForSequenceClassification"),
         ("mixtral", "MixtralForSequenceClassification"),
@@ -1372,6 +1375,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("mega", "MegaForQuestionAnswering"),
         ("megatron-bert", "MegatronBertForQuestionAnswering"),
         ("minimax", "MiniMaxForQuestionAnswering"),
+        ("minimax_m2", "MiniMaxM2ForQuestionAnswering"),
         ("ministral", "MinistralForQuestionAnswering"),
         ("mistral", "MistralForQuestionAnswering"),
         ("mixtral", "MixtralForQuestionAnswering"),
@@ -1488,6 +1492,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("mega", "MegaForTokenClassification"),
         ("megatron-bert", "MegatronBertForTokenClassification"),
         ("minimax", "MiniMaxForTokenClassification"),
+        ("minimax_m2", "MiniMaxM2ForTokenClassification"),
         ("ministral", "MinistralForTokenClassification"),
         ("mistral", "MistralForTokenClassification"),
         ("mixtral", "MixtralForTokenClassification"),

diff --git a/src/transformers/models/minimax_m2/__init__.py b/src/transformers/models/minimax_m2/__init__.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+# Copyright 2025 the HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure
+
+
+if TYPE_CHECKING:
+    from .configuration_minimax_m2 import *
+    from .modeling_minimax_m2 import *
+else:
+    import sys
+
+    _file = globals()["__file__"]
+    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)