Skip to content

Commit ca8010b

Browse files
JingofXinlwj-stChenJiahaoST
authored
Supported sft for vlm (#499)
Co-authored-by: lwj-st <liwenjian.vendor@sensetime.com> Co-authored-by: Chenjiahao <chenjiahao@sensetime.com>
1 parent d706489 commit ca8010b

File tree

16 files changed

+102
-61
lines changed

16 files changed

+102
-61
lines changed

LazyLLM-Env

lazyllm/cli/install.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -181,36 +181,6 @@ def install(commands): # noqa C901
181181

182182
extra_pkgs = set()
183183

184-
want_vllm = any(p.startswith("vllm") for p in pkgs)
185-
want_llamafactory = any(p.startswith("lazyllm-llamafactory") for p in pkgs)
186-
187-
try:
188-
importlib.metadata.version("vllm")
189-
orig_vllm = True
190-
except importlib.metadata.PackageNotFoundError:
191-
orig_vllm = False
192-
193-
try:
194-
importlib.metadata.version("lazyllm-llamafactory")
195-
orig_llamafactory = True
196-
except importlib.metadata.PackageNotFoundError:
197-
orig_llamafactory = False
198-
199-
need_transformers = (
200-
(want_vllm and orig_llamafactory)
201-
or (want_llamafactory and orig_vllm)
202-
or (want_vllm and want_llamafactory)
203-
)
204-
205-
if need_transformers:
206-
try:
207-
tr_ver = importlib.metadata.version("transformers")
208-
except importlib.metadata.PackageNotFoundError:
209-
pass
210-
else:
211-
if tr_ver != "4.46.1":
212-
extra_pkgs.add("transformers==4.46.1")
213-
214184
for p in pkgs:
215185
if p.startswith("flash-attn"):
216186
try:

lazyllm/components/deploy/lmdeploy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def cmd(self, finetuned_model=None, base_model=None):
5858
f"base_model({base_model}) will be used")
5959
finetuned_model = base_model
6060

61-
model_type = ModelManager.get_model_type(finetuned_model)
61+
model_type = ModelManager.get_model_type(base_model or finetuned_model)
6262
if model_type == 'vlm':
6363
self.kw.pop("chat-template")
6464
else:

lazyllm/components/deploy/vllm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(self, trust_remote_code=True, launcher=launchers.remote(ngpus=1), s
4545
'host': '0.0.0.0',
4646
'max-num-seqs': 256,
4747
'pipeline-parallel-size': 1,
48-
'max-num-batched-tokens': 64000,
48+
'max-num-batched-tokens': 128000,
4949
})
5050
self.trust_remote_code = trust_remote_code
5151
self.kw.check_and_update(kw)

lazyllm/components/finetune/llama_factory/model_mapping.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def match_longest_prefix(model_name):
4040
'index': 'index',
4141
'internlm': 'intern',
4242
'internlm2': 'intern2',
43+
'internvl': 'intern_vl',
4344
'llama-2': 'llama2',
4445
'llama-3': 'llama3',
4546
'llama-3.2': 'mllama',
@@ -64,7 +65,9 @@ def match_longest_prefix(model_name):
6465
'pixtral': 'pixtral',
6566
'qwen': 'qwen',
6667
'codeqwen': 'qwen',
68+
'qwen-vl': 'qwen2_vl',
6769
'qwen2-vl': 'qwen2_vl',
70+
'qwen2.5-vl': 'qwen2_vl',
6871
'solar': 'solar',
6972
'telechat': 'telechat',
7073
'vicuna': 'vicuna',

lazyllm/components/finetune/llama_factory/sft.yaml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
### ModelArguments
22
model_name_or_path: internlm2-chat-7b
3+
trust_remote_code: true
34
adapter_name_or_path: null
45
adapter_folder: null
56
cache_dir: null
67
use_fast_tokenizer: true
78
resize_vocab: false
89
split_special_tokens: false
9-
new_special_tokens: null
1010
model_revision: main
1111
low_cpu_mem_usage: true
1212
quantization_bit: null
@@ -18,7 +18,6 @@ flash_attn: auto
1818
shift_attn: false
1919
mixture_of_depths: null
2020
use_unsloth: false
21-
# visual_inputs: false
2221
moe_aux_loss_coef: null
2322
disable_gradient_checkpointing: false
2423
upcast_layernorm: false
@@ -49,7 +48,6 @@ print_param_status: false
4948
template: null
5049
dataset: identity,alpaca_en_demo
5150
dataset_dir: lazyllm_temp_dir
52-
# split: train
5351
cutoff_len: 1024
5452
train_on_prompt: false
5553
streaming: false
@@ -172,7 +170,6 @@ gradient_checkpointing_kwargs: null
172170
include_inputs_for_metrics: false
173171
eval_do_concat_batches: true
174172
fp16_backend: auto
175-
evaluation_strategy: null
176173
push_to_hub_model_id: null
177174
push_to_hub_organization: null
178175
push_to_hub_token: null
@@ -185,8 +182,6 @@ ddp_timeout: 180000000
185182
torch_compile: false
186183
torch_compile_backend: null
187184
torch_compile_mode: null
188-
dispatch_batches: null
189-
split_batches: null
190185
include_tokens_per_second: false
191186
include_num_input_tokens_seen: false
192187
neftune_noise_alpha: null
@@ -254,7 +249,6 @@ stage: sft
254249
finetuning_type: lora
255250
use_llama_pro: false
256251
freeze_vision_tower: true
257-
train_mm_proj_only: false
258252
plot_loss: true
259253

260254
### GeneratingArguments

lazyllm/components/finetune/llamafactory.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,31 @@ def build_temp_dataset_info(self, datapaths):
116116
assert os.path.isfile(datapath)
117117
file_name, _ = os.path.splitext(os.path.basename(datapath))
118118
temp_dataset_dict[file_name] = {'file_name': datapath}
119+
formatting = 'alpaca'
120+
try:
121+
with open(datapath, 'r', encoding='utf-8') as file:
122+
data = json.load(file)
123+
if 'messages' in data[0]:
124+
formatting = 'sharegpt'
125+
media_types = []
126+
for media in ['images', 'videos', 'audios']:
127+
if media in data[0]:
128+
media_types.append(media)
129+
if media_types:
130+
columns = {item: item for item in media_types}
131+
columns.update({"messages": "messages"})
132+
temp_dataset_dict[file_name].update({
133+
"tags": {
134+
"role_tag": "role",
135+
"content_tag": "content",
136+
"user_tag": "user",
137+
"assistant_tag": "assistant"
138+
},
139+
"columns": columns
140+
})
141+
except Exception:
142+
pass
143+
temp_dataset_dict[file_name].update({'formatting': formatting})
119144
self.temp_dataset_info_path = os.path.join(self.temp_folder, 'dataset_info.json')
120145
with open(self.temp_dataset_info_path, 'w') as json_file:
121146
json.dump(temp_dataset_dict, json_file, indent=4)
@@ -149,7 +174,7 @@ def cmd(self, trainset, valset=None) -> str:
149174
random_value = random.randint(1000, 9999)
150175
self.log_file_path = f'{self.target_path}/train_log_{formatted_date}_{random_value}.log'
151176

152-
cmds = f'llamafactory-cli train {self.temp_yaml_file}'
177+
cmds = f'export DISABLE_VERSION_CHECK=1 && llamafactory-cli train {self.temp_yaml_file}'
153178
cmds += f' 2>&1 | tee {self.log_file_path}'
154179
if self.temp_export_yaml_file:
155180
cmds += f' && llamafactory-cli export {self.temp_export_yaml_file}'

lazyllm/components/text_to_speech/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
class TTSDeploy:
77

88
def __new__(cls, name, **kwarg):
9+
name = name.lower()
910
if name == 'bark':
1011
return BarkDeploy(**kwarg)
11-
elif name == 'ChatTTS':
12+
elif name in ('chattts', 'chattts-new'):
1213
return ChatTTSDeploy(**kwarg)
1314
elif name.startswith('musicgen'):
1415
return MusicGenDeploy(**kwarg)

lazyllm/components/text_to_speech/chattts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def __call__(self, string):
5757
params_refine_text=params_refine_text,
5858
params_infer_code=params_infer_code,
5959
)
60-
file_path = sounds_to_files(speech[0], self.save_path)
60+
file_path = sounds_to_files(speech, self.save_path)
6161
return encode_query_with_filepaths(files=file_path)
6262

6363
@classmethod

lazyllm/components/utils/downloader/model_mapping.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,13 @@
182182
},
183183
"type": "tts"
184184
},
185+
"chattts-new": {
186+
"source": {
187+
"huggingface": "2Noise/ChatTTS",
188+
"modelscope": "AI-ModelScope/ChatTTS"
189+
},
190+
"type": "tts"
191+
},
185192
"internvl-chat-v1-5": {
186193
"source": {
187194
"huggingface": "OpenGVLab/InternVL-Chat-V1-5",
@@ -224,6 +231,13 @@
224231
},
225232
"type": "vlm"
226233
},
234+
"qwen2.5-vl-3b-instruct": {
235+
"source": {
236+
"huggingface": "Qwen/Qwen2.5-VL-3B-Instruct",
237+
"modelscope": "Qwen/Qwen2.5-VL-3B-Instruct"
238+
},
239+
"type": "vlm"
240+
},
227241
"musicgen-medium": {
228242
"source": {
229243
"huggingface": "facebook/musicgen-medium",

lazyllm/thirdparty/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
'huggingface_hub': 'huggingface-hub',
88
'jwt': 'PyJWT',
99
'rank_bm25': 'rank-bm25',
10-
'collie': 'collie-lm',
1110
'faiss': 'faiss-cpu',
1211
'flash_attn': 'flash-attn',
1312
'sklearn': 'scikit-learn'
@@ -78,7 +77,7 @@ def __getattribute__(self, __name):
7877
raise ImportError(err_msg)
7978

8079
modules = ['redis', 'huggingface_hub', 'jieba', 'modelscope', 'pandas', 'jwt', 'rank_bm25', 'redisvl', 'datasets',
81-
'deepspeed', 'fire', 'numpy', 'peft', 'torch', 'transformers', 'collie', 'faiss', 'flash_attn', 'google',
80+
'deepspeed', 'fire', 'numpy', 'peft', 'torch', 'transformers', 'faiss', 'flash_attn', 'google',
8281
'lightllm', 'vllm', 'ChatTTS', 'wandb', 'funasr', 'sklearn', 'torchvision', 'scipy', 'pymilvus',
8382
'sentence_transformers', 'gradio', 'chromadb', 'nltk', 'PIL', 'httpx', 'bm25s', 'kubernetes', 'pymongo',
8483
'rapidfuzz', 'FlagEmbedding', 'mcp', 'diffusers', 'pypdf', 'pptx', 'html2text', 'ebooklib', 'docx2txt',

pyproject.toml

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ redisvl = { version = ">=0.1.3", optional = true }
6262
datasets = { version = ">=2.18.0", optional = true }
6363
deepspeed = { version = ">=0.12.3", optional = true }
6464
fire = { version = ">=0.6.0", optional = true }
65-
peft = { version = ">=0.3.0", optional = true }
65+
peft = {version = "==0.14.0", optional = true}
6666
torch = { version = ">=2.1.2", optional = true }
67-
transformers = { version = ">=4.41.1", optional = true }
67+
transformers = {version = "==4.51.3", optional = true}
6868
collie-lm = { version = ">=1.0.7", optional = true }
6969
faiss-cpu = { version = ">=1.8.0", optional = true }
7070
google = { version = ">=3.0.0", optional = true }
@@ -76,13 +76,12 @@ vllm = {version = "==0.7.3", optional = true}
7676
wandb = { version = ">=0.17.0", optional = true }
7777
chattts = {version = "^0.1.1", optional = true}
7878
funasr = {version = "^1.1.4", optional = true}
79-
lazyllm-lmdeploy = {version = "==0.7.1rc0", optional = true}
8079
timm = {version = "^1.0.8", optional = true}
8180
diffusers = {version = "^0.30.0", optional = true}
8281
sortedcontainers = {version = "^2.4.0", optional = true}
8382
flash-attn = {version = "^2.7.4.post1", optional = true}
8483
lightllm = {version = "^0.0.1", optional = true}
85-
lazyllm-llamafactory = {version = "==0.9.1rc0", optional = true}
84+
lazyllm-llamafactory = {version = "==0.9.3.dev0", optional = true}
8685
rotary-embedding-torch = {version = "^0.8.3", optional = true}
8786
infinity-emb = {version = "==0.0.70", optional = true}
8887
ctranslate2 = {version = "^4.0.0", optional = true}
@@ -94,6 +93,9 @@ flagembedding = {version = "^1.3.4", optional = true}
9493
mcp = {version = ">=1.5.0", optional = true}
9594
pytesseract = {version = "^0.3.13", optional = true}
9695
openai-whisper = {version = "*", optional = true}
96+
qwen-vl-utils = {version = "^0.0.11", optional = true}
97+
accelerate = {version = "==1.6.0", optional = true}
98+
lmdeploy = {version = "==0.8.0", optional = true}
9799

98100
[tool.poetry.extras]
99101
standard = [
@@ -122,7 +124,7 @@ standard = [
122124
"wandb",
123125
"chattts",
124126
"funasr",
125-
"lazyllm-lmdeploy",
127+
"lmdeploy",
126128
"rotary-embedding-torch",
127129
"infinity-emb",
128130
"ctranslate2",
@@ -157,7 +159,7 @@ full = [
157159
"wandb",
158160
"chattts",
159161
"funasr",
160-
"lazyllm-lmdeploy",
162+
"lmdeploy",
161163
"timm",
162164
"diffusers",
163165
"redis",
@@ -175,9 +177,14 @@ full = [
175177
"ctranslate2",
176178
"optimum",
177179
"typer",
180+
"pymongo",
181+
"pymysql",
178182
"flagembedding",
179183
"mcp",
180-
"pytesseract"
184+
"pytesseract",
185+
"openai-whisper",
186+
"qwen-vl-utils",
187+
"accelerate"
181188
]
182189
alpaca-lora = [
183190
"appdirs",
@@ -208,7 +215,10 @@ llama-factory = [
208215
"tensorboard",
209216
"tensorboard-data-server",
210217
"torch",
211-
"transformers"
218+
"transformers",
219+
"accelerate",
220+
"qwen-vl-utils",
221+
"lmdeploy"
212222
]
213223
finetune-all = [
214224
"appdirs",
@@ -246,7 +256,7 @@ vllm = [
246256
lmdeploy = [
247257
"huggingface-hub",
248258
"modelscope",
249-
"lazyllm-lmdeploy"
259+
"lmdeploy"
250260
]
251261
lightllm = [
252262
"huggingface-hub",
@@ -263,7 +273,7 @@ deploy-all = [
263273
"modelscope",
264274
"vllm",
265275
"sentence-transformers",
266-
"lazyllm-lmdeploy",
276+
"lmdeploy",
267277
"infinity-emb"
268278
]
269279
multimodal = [

requirements.full.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ redisvl>=0.1.3
4949
datasets>=2.18.0
5050
deepspeed>=0.12.3
5151
fire>=0.6.0
52-
peft>=0.3.0
52+
peft==0.14.0
5353
torch>=2.1.2
54-
transformers>=4.41.1
54+
transformers==4.51.3
5555
collie-lm>=1.0.7
5656
faiss-cpu>=1.8.0
5757
google>=3.0.0
@@ -63,13 +63,12 @@ vllm==0.7.3
6363
wandb>=0.17.0
6464
chattts
6565
funasr
66-
lazyllm-lmdeploy==0.7.1rc0
6766
timm
6867
diffusers
6968
sortedcontainers
7069
flash-attn
7170
lightllm
72-
lazyllm-llamafactory==0.9.1rc0
71+
lazyllm-llamafactory==0.9.3.dev0
7372
rotary-embedding-torch
7473
infinity-emb==0.0.70
7574
ctranslate2
@@ -81,3 +80,6 @@ flagembedding
8180
mcp>=1.5.0
8281
pytesseract
8382
openai-whisper
83+
qwen-vl-utils
84+
accelerate==1.6.0
85+
lmdeploy==0.8.0

tests/advanced_tests/standard_test/test_deploy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def test_musicgen(self):
140140
assert len(res['files']) == 1
141141

142142
def test_chattts(self):
143-
m = lazyllm.TrainableModule('ChatTTS')
143+
m = lazyllm.TrainableModule('ChatTTS-new')
144144
m.update_server()
145145
r = m('你好啊,很高兴认识你。')
146146
res = decode_query_with_filepaths(r)

tests/advanced_tests/standard_test/test_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def test_engine_infer_server_tts(self):
238238
token = '123'
239239
engine = LightEngine()
240240
engine.launch_localllm_infer_service()
241-
jobid, _ = engine.deploy_model(token, 'ChatTTS')
241+
jobid, _ = engine.deploy_model(token, 'ChatTTS-new')
242242
engine.infer_client.wait_ready(token, jobid)
243243
r = engine.get_infra_handle(token, jobid)
244244
assert isinstance(r, lazyllm.TrainableModule) and r._impl._get_deploy_tasks.flag

0 commit comments

Comments
 (0)