1
1
from multiprocessing import cpu_count
2
- import threading
2
+ import threading , pdb , librosa
3
3
from time import sleep
4
4
from subprocess import Popen
5
5
from time import sleep
17
17
warnings .filterwarnings ("ignore" )
18
18
torch .manual_seed (114514 )
19
19
from i18n import I18nAuto
20
+ import ffmpeg
20
21
21
22
i18n = I18nAuto ()
22
23
# 判断是否有能用来训练和加速推理的N卡
@@ -235,7 +236,7 @@ def vc_multi(
235
236
yield traceback .format_exc ()
236
237
237
238
238
- def uvr (model_name , inp_root , save_root_vocal , paths , save_root_ins ):
239
+ def uvr (model_name , inp_root , save_root_vocal , paths , save_root_ins , agg ):
239
240
infos = []
240
241
try :
241
242
inp_root = inp_root .strip (" " ).strip ('"' ).strip ("\n " ).strip ('"' ).strip (" " )
@@ -246,6 +247,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
246
247
save_root_ins .strip (" " ).strip ('"' ).strip ("\n " ).strip ('"' ).strip (" " )
247
248
)
248
249
pre_fun = _audio_pre_ (
250
+ agg = int (agg ),
249
251
model_path = os .path .join (weight_uvr5_root , model_name + ".pth" ),
250
252
device = device ,
251
253
is_half = is_half ,
@@ -254,10 +256,25 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
254
256
paths = [os .path .join (inp_root , name ) for name in os .listdir (inp_root )]
255
257
else :
256
258
paths = [path .name for path in paths ]
257
- for name in paths :
258
- inp_path = os .path .join (inp_root , name )
259
+ for path in paths :
260
+ inp_path = os .path .join (inp_root , path )
261
+ need_reformat = 1
262
+ done = 0
263
+ try :
264
+ info = ffmpeg .probe (inp_path , cmd = "ffprobe" )
265
+ if (info ["streams" ][0 ]["channels" ]== 2 and info ["streams" ][0 ]["sample_rate" ]== "44100" ):
266
+ need_reformat = 0
267
+ pre_fun ._path_audio_ (inp_path , save_root_ins , save_root_vocal )
268
+ done = 1
269
+ except :
270
+ need_reformat = 1
271
+ traceback .print_exc ()
272
+ if (need_reformat == 1 ):
273
+ tmp_path = "%s/%s.reformatted.wav" % (tmp ,os .path .basename (inp_path ))
274
+ os .system ("ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" % (inp_path ,tmp_path ))
275
+ inp_path = tmp_path
259
276
try :
260
- pre_fun ._path_audio_ (inp_path , save_root_ins , save_root_vocal )
277
+ if ( done == 0 ): pre_fun ._path_audio_ (inp_path , save_root_ins , save_root_vocal )
261
278
infos .append ("%s->Success" % (os .path .basename (inp_path )))
262
279
yield "\n " .join (infos )
263
280
except :
@@ -1147,6 +1164,15 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1147
1164
)
1148
1165
with gr .Column ():
1149
1166
model_choose = gr .Dropdown (label = i18n ("模型" ), choices = uvr5_names )
1167
+ agg = gr .Slider (
1168
+ minimum = 0 ,
1169
+ maximum = 20 ,
1170
+ step = 1 ,
1171
+ label = "人声提取激进程度" ,
1172
+ value = 10 ,
1173
+ interactive = True ,
1174
+ visible = False #先不开放调整
1175
+ )
1150
1176
opt_vocal_root = gr .Textbox (
1151
1177
label = i18n ("指定输出人声文件夹" ), value = "opt"
1152
1178
)
@@ -1161,6 +1187,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1161
1187
opt_vocal_root ,
1162
1188
wav_inputs ,
1163
1189
opt_ins_root ,
1190
+ agg
1164
1191
],
1165
1192
[vc_output4 ],
1166
1193
)
@@ -1246,7 +1273,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1246
1273
with gr .Row ():
1247
1274
save_epoch10 = gr .Slider (
1248
1275
minimum = 0 ,
1249
- maximum = 200 ,
1276
+ maximum = 50 ,
1250
1277
step = 1 ,
1251
1278
label = i18n ("保存频率save_every_epoch" ),
1252
1279
value = 5 ,
0 commit comments