-
Notifications
You must be signed in to change notification settings - Fork 266
Open
Description
Hi there!
Thank you for the great work.
I am trying to train and tune the model on my custom dataset, which has 60-second MP4 video clips and corresponding QA.
I modified the config files, data.py file, and everything else needed. Also, I kept the "mode" to "it" instead of "pt."
But when I start training the model this is what I got:
2024-12-19T03:26:01 | utils.basic_utils: Train Epoch: [0] [ 0/528] eta: 2:38:49 lr: 0.000001 video-loss: nan time: 18.0487 data: 9.4180 max mem: 39278 res mem: 40794
2024-12-19T03:29:55 | utils.basic_utils: Train Epoch: [0] [100/528] eta: 0:17:46 lr: 0.000096 video-loss: nan time: 2.3156 data: 0.0019 max mem: 40735 res mem: 43122
2024-12-19T03:33:48 | utils.basic_utils: Train Epoch: [0] [200/528] eta: 0:13:11 lr: 0.000088 video-loss: nan time: 2.5185 data: 0.0018 max mem: 40735 res mem: 43122
Here is the config_7b_stage1.py file:
from configs.data import *
# ========================= data ==========================
train_corpus = "LLAPSA"
train_file = "${available_corpus[${train_corpus}]}"
test_file = dict()
test_types = []
num_workers = 6
stop_key = None
# ========================= input ==========================
num_frames = 4
num_frames_test = 4
batch_size = 16
max_txt_l = 16
pre_text = False
inputs = dict(
image_res=224,
video_input=dict(
num_frames="${num_frames}",
sample_type="rand",
num_frames_test="${num_frames_test}",
sample_type_test="middle",
random_aug=False,
),
max_txt_l=dict(image="${max_txt_l}", video="${max_txt_l}"),
batch_size=dict(image="${batch_size}", video="${batch_size}"),
batch_size_test=dict(image="${batch_size}", video="${batch_size}"),
)
# ========================= model ==========================
model = dict(
vit_model="eva_clip_g",
vit_model_path="/data/gauravs/video_chat/eva_vit_g.pth",
q_former_model_path="/data/gauravs/video_chat/blip2_pretrained_flant5xxl.pth",
llama_model_path="/data/gauravs/weights/vicuna-7b-v1.5",
gpt_model_path="",
img_size=224,
num_query_token=32,
drop_path_rate=0.,
use_grad_checkpoint=False,
vit_precision="fp32",
freeze_vit=True,
freeze_mhra=False, # open mhra
freeze_qformer=True,
low_resource=False,
prompt_path="prompts/concise_description.txt",
img_prompt_path="prompts/concise_image_description.txt",
prompt_template="###Human: {} ###Assistant: ",
max_txt_len="${max_txt_l}", # use large max_txt_len on stage2
end_sym="###",
# uniformerv2
temporal_downsample=False,
no_lmhra=True,
double_lmhra=False,
lmhra_reduction=2.0,
gmhra_layers=8,
gmhra_drop_path_rate=0.,
gmhra_dropout=0.5,
# qformer
extra_num_query_token=64,
)
optimizer = dict(
opt="adamW",
lr=1e-4,
opt_betas=[0.9, 0.999], # default
weight_decay=0.02,
max_grad_norm=-1, # requires a positive float, use -1 to disable
# use a different lr for some modules, e.g., larger lr for new modules
different_lr=dict(enable=False, module_names=[], lr=1e-3),
)
scheduler = dict(sched="cosine", epochs=1, min_lr_multi=0.01, warmup_epochs=0.2)
evaluate = False
deep_fusion = False
evaluation = dict(
eval_frame_ensemble="concat", # [concat, max, mean, lse]
eval_x_only=False,
k_test=128,
eval_offload=True, # offload gpu tensors to cpu to save memory.
)
fp16 = True
gradient_checkpointing = True
# ========================= wandb ==========================
wandb = dict(
enable=False,
entity="user", # username or team name to store the runs, see https://docs.wandb.ai/ref/python/init
project="videochat", # setup in your command line
)
dist_url = "env://"
device = "cuda"
mode = "pt"
# ========================= others ==========================
output_dir = "" # output dir
resume = False # if True, load optimizer and scheduler states as well
debug = False
log_freq = 100
seed = 42
save_latest = True
auto_resume = True
pretrained_path = "" # path to pretrained model weights, for resume only?
Can anyone please help me!! It is urgent!
Metadata
Metadata
Assignees
Labels
No labels