Skip to content

Commit af208d5

Browse files
authored
Add files via upload
1 parent a149107 commit af208d5

File tree

5 files changed

+352
-326
lines changed

5 files changed

+352
-326
lines changed

go-web.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
runtime\python.exe infer-web.py --pycmd runtime\python.exe
1+
runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897
22
pause

infer-web.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from multiprocessing import cpu_count
2-
import threading
2+
import threading,pdb,librosa
33
from time import sleep
44
from subprocess import Popen
55
from time import sleep
@@ -17,6 +17,7 @@
1717
warnings.filterwarnings("ignore")
1818
torch.manual_seed(114514)
1919
from i18n import I18nAuto
20+
import ffmpeg
2021

2122
i18n = I18nAuto()
2223
# 判断是否有能用来训练和加速推理的N卡
@@ -235,7 +236,7 @@ def vc_multi(
235236
yield traceback.format_exc()
236237

237238

238-
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
239+
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins,agg):
239240
infos = []
240241
try:
241242
inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
@@ -246,6 +247,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
246247
save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
247248
)
248249
pre_fun = _audio_pre_(
250+
agg=int(agg),
249251
model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
250252
device=device,
251253
is_half=is_half,
@@ -254,10 +256,25 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins):
254256
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
255257
else:
256258
paths = [path.name for path in paths]
257-
for name in paths:
258-
inp_path = os.path.join(inp_root, name)
259+
for path in paths:
260+
inp_path = os.path.join(inp_root, path)
261+
need_reformat=1
262+
done=0
263+
try:
264+
info = ffmpeg.probe(inp_path, cmd="ffprobe")
265+
if(info["streams"][0]["channels"]==2 and info["streams"][0]["sample_rate"]=="44100"):
266+
need_reformat=0
267+
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
268+
done=1
269+
except:
270+
need_reformat = 1
271+
traceback.print_exc()
272+
if(need_reformat==1):
273+
tmp_path="%s/%s.reformatted.wav"%(tmp,os.path.basename(inp_path))
274+
os.system("ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"%(inp_path,tmp_path))
275+
inp_path=tmp_path
259276
try:
260-
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
277+
if(done==0):pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
261278
infos.append("%s->Success" % (os.path.basename(inp_path)))
262279
yield "\n".join(infos)
263280
except:
@@ -1147,6 +1164,15 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
11471164
)
11481165
with gr.Column():
11491166
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
1167+
agg = gr.Slider(
1168+
minimum=0,
1169+
maximum=20,
1170+
step=1,
1171+
label="人声提取激进程度",
1172+
value=10,
1173+
interactive=True,
1174+
visible=False#先不开放调整
1175+
)
11501176
opt_vocal_root = gr.Textbox(
11511177
label=i18n("指定输出人声文件夹"), value="opt"
11521178
)
@@ -1161,6 +1187,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
11611187
opt_vocal_root,
11621188
wav_inputs,
11631189
opt_ins_root,
1190+
agg
11641191
],
11651192
[vc_output4],
11661193
)
@@ -1246,7 +1273,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
12461273
with gr.Row():
12471274
save_epoch10 = gr.Slider(
12481275
minimum=0,
1249-
maximum=200,
1276+
maximum=50,
12501277
step=1,
12511278
label=i18n("保存频率save_every_epoch"),
12521279
value=5,

infer_uvr5.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
class _audio_pre_:
16-
def __init__(self, model_path, device, is_half):
16+
def __init__(self, agg,model_path, device, is_half):
1717
self.model_path = model_path
1818
self.device = device
1919
self.data = {
@@ -22,7 +22,7 @@ def __init__(self, model_path, device, is_half):
2222
"tta": False,
2323
# Constants
2424
"window_size": 512,
25-
"agg": 10,
25+
"agg": agg,
2626
"high_end_process": "mirroring",
2727
}
2828
nn_arch_sizes = [
@@ -139,7 +139,7 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
139139
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
140140
print("%s instruments done" % name)
141141
wavfile.write(
142-
os.path.join(ins_root, "instrument_{}.wav".format(name)),
142+
os.path.join(ins_root, "instrument_{}_{}.wav".format(name,self.data["agg"])),
143143
self.mp.param["sr"],
144144
(np.array(wav_instrument) * 32768).astype("int16"),
145145
) #
@@ -155,7 +155,7 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
155155
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
156156
print("%s vocals done" % name)
157157
wavfile.write(
158-
os.path.join(vocal_root, "vocal_{}.wav".format(name)),
158+
os.path.join(vocal_root, "vocal_{}_{}.wav".format(name,self.data["agg"])),
159159
self.mp.param["sr"],
160160
(np.array(wav_vocals) * 32768).astype("int16"),
161161
)

train_nsf_sim_cache_sid_load_pretrain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
def main():
4646
# n_gpus = torch.cuda.device_count()
4747
os.environ["MASTER_ADDR"] = "localhost"
48-
os.environ["MASTER_PORT"] = "51515"
48+
os.environ["MASTER_PORT"] = "51545"
4949

5050
mp.spawn(
5151
run,

0 commit comments

Comments
 (0)