-
Notifications
You must be signed in to change notification settings - Fork 62
Open
Description
yaml 配置:
base:
seed: &seed 0
model:
# type: Qwen2VL
type: Qwen2_5VL
path: /mnt/data/junjun.zhao/models/Qwen2.5-VL-7B-Instruct
# path: /mnt/data/junjun.zhao/models/Qwen2.5-VL-3B-Instruct
# tokenizer_mode: fast
torch_dtype: torch.float32
calib:
# name: wikitext2
# download: True
# n_samples: 128
# path: calib data path
# bs: 1
# seq_len: 2048
# preproc: wikitext2_gptq
# seed: *seed
name: custom_mm
n_samples: 128
# n_samples: 128
download: False
path: /mnt/data/junjun.zhao/datasets/VQA_mini
apply_chat_template: True
add_answer: True # Defalut is False. If set it to Ture, calib data will add answers.
bs: 1
seq_len: 512
preproc: vlm_general
padding: True
seed: *seed
eval:
# eval_pos: [pretrain, fake_quant]
eval_pos: [fake_quant]
type: vqa
name: [mme]
download: False
path: /mnt/data/junjun.zhao/datasets/datasets/lmms-lab/MME
# name: wikitext2
# download: True
# seq_len: 2048
bs: 1
inference_per_block: False
quant:
method: GPTQ
# quant_objects: [vision, language] # default is [language]
# quant_objects: [language] # default is [language]
vision:
method: GPTQ
weight:
bit: 8
symmetric: True
granularity: per_channel
group_size: -1
calib_algo: mse
# calib_algo: mse
mse_b_num: 2
act:
bit: 8
symmetric: True
granularity: per_token
calib_algo: minmax
special:
actorder: True
static_groups: False
percdamp: 0.01
blocksize: 128
true_sequential: True
language:
method: GPTQ
weight:
bit: 8
symmetric: True
granularity: per_channel
group_size: -1
calib_algo: mse
# calib_algo: mse
# mse_b_num: 2
act:
bit: 8
symmetric: True
granularity: per_token
calib_algo: minmax
special:
actorder: True
static_groups: False
percdamp: 0.02
blocksize: 128
true_sequential: True
quant_out: True
save:
save_fake: True
save_path: /mnt/data/junjun.zhao/saved_model/qwen2_5_vl_7b_gptq_w8a8_vlm_language/
# save_path: /mnt/data/junjun.zhao/saved_model/qwen2_5_vl_3b_gptq_w8a8_vlm/版本说明:
- Name: transformers Version: 4.57.0
- Name: lmms_eval Version: 0.5.0
修改了部分代码适配 transformers 和 lmms_eval 新版本
问题 1:
- quant_objects: [vision, language] : 这么配置的时候,只会走 language , debug 相关代码不会走 vision
- vision 和 language 分开配置: 在量化校准 cali 阶段可以走到 vision 和 language, 但是 deploy_fake_quant_model 的时候,只会 执行 vision , 没有 for 循环,执行不到 language
- deploy_fake_quant_model vision 时 把 Linear 替换为 EffcientFakeQuantLinear 时, merger.mlp. 也会报错
- 当前代码似乎不能 同时构建 vision 和 language 的带有 EffcientFakeQuantLinear 的 model
Metadata
Metadata
Assignees
Labels
No labels