chore(sync): merge dev into main (#1399)

* Update mdxnet.py

* Update modules.py

* Rename preprocess.py to vr.py

* Add files via upload

* deps: add av lib (#1391)

* Add files via upload

* chore(format): run black on dev (#1398)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

---------

Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
Co-authored-by: Hiroto N <hironow365@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
github-actions[bot] 2023-10-08 18:54:58 +08:00 committed by GitHub
parent 1fdbb94811
commit 1696c5356b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 76 additions and 35 deletions

View File

@ -1,3 +1,6 @@
import os
import traceback
import librosa import librosa
import numpy as np import numpy as np
import av import av
@ -47,10 +50,14 @@ def audio2(i, o, format, sr):
def load_audio(file, sr): def load_audio(file, sr):
try:
file = ( file = (
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
) # 防止小白拷路径头尾带了空格和"和回车 ) # 防止小白拷路径头尾带了空格和"和回车
if os.path.exists(file) == False:
raise RuntimeError(
"You input a wrong audio path that does not exists, please fix it!"
)
try:
with open(file, "rb") as f: with open(file, "rb") as f:
with BytesIO() as out: with BytesIO() as out:
audio2(f, out, "f32le", sr) audio2(f, out, "f32le", sr)
@ -62,5 +69,5 @@ def load_audio(file, sr):
audio = np.mean(audio, -1) audio = np.mean(audio, -1)
return librosa.resample(audio, orig_sr=file[0], target_sr=16000) return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
except Exception as e: except:
raise RuntimeError(f"Failed to load audio: {e}") raise RuntimeError(traceback.format_exc())

View File

@ -216,16 +216,26 @@ class Predictor:
path_other = "%s/%s_others.wav" % (others_root, basename) path_other = "%s/%s_others.wav" % (others_root, basename)
sf.write(path_vocal, mix - opt, rate) sf.write(path_vocal, mix - opt, rate)
sf.write(path_other, opt, rate) sf.write(path_other, opt, rate)
opt_path_vocal = path_vocal[:-4] + ".%s" % format
opt_path_other = path_other[:-4] + ".%s" % format
if os.path.exists(path_vocal): if os.path.exists(path_vocal):
os.system( os.system(
"ffmpeg -i %s -vn %s -q:a 2 -y" "ffmpeg -i %s -vn %s -q:a 2 -y" % (path_vocal, opt_path_vocal)
% (path_vocal, path_vocal[:-4] + ".%s" % format)
) )
if os.path.exists(opt_path_vocal):
try:
os.remove(path_vocal)
except:
pass
if os.path.exists(path_other): if os.path.exists(path_other):
os.system( os.system(
"ffmpeg -i %s -vn %s -q:a 2 -y" "ffmpeg -i %s -vn %s -q:a 2 -y" % (path_other, opt_path_other)
% (path_other, path_other[:-4] + ".%s" % format)
) )
if os.path.exists(opt_path_other):
try:
os.remove(path_other)
except:
pass
class MDXNetDereverb: class MDXNetDereverb:
@ -242,5 +252,5 @@ class MDXNetDereverb:
self.pred = Predictor(self) self.pred = Predictor(self)
self.device = device self.device = device
def path_audio(self, input, vocal_root, others_root, format): def _path_audio_(self, input, vocal_root, others_root, format, is_hp3=False):
self.pred.prediction(input, vocal_root, others_root, format) self.pred.prediction(input, vocal_root, others_root, format)

View File

@ -9,7 +9,7 @@ import torch
from configs.config import Config from configs.config import Config
from infer.modules.uvr5.mdxnet import MDXNetDereverb from infer.modules.uvr5.mdxnet import MDXNetDereverb
from infer.modules.uvr5.preprocess import AudioPre, AudioPreDeEcho from infer.modules.uvr5.vr import AudioPre, AudioPreDeEcho
config = Config() config = Config()
@ -36,6 +36,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
device=config.device, device=config.device,
is_half=config.is_half, is_half=config.is_half,
) )
is_hp3 = "HP3" in model_name
if inp_root != "": if inp_root != "":
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
else: else:
@ -52,7 +53,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
): ):
need_reformat = 0 need_reformat = 0
pre_fun._path_audio_( pre_fun._path_audio_(
inp_path, save_root_ins, save_root_vocal, format0 inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
) )
done = 1 done = 1
except: except:
@ -70,7 +71,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
inp_path = tmp_path inp_path = tmp_path
try: try:
if done == 0: if done == 0:
pre_fun.path_audio( pre_fun._path_audio_(
inp_path, save_root_ins, save_root_vocal, format0 inp_path, save_root_ins, save_root_vocal, format0
) )
infos.append("%s->Success" % (os.path.basename(inp_path))) infos.append("%s->Success" % (os.path.basename(inp_path)))

View File

@ -41,7 +41,9 @@ class AudioPre:
self.mp = mp self.mp = mp
self.model = model self.model = model
def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"): def _path_audio_(
self, music_file, ins_root=None, vocal_root=None, format="flac", is_hp3=False
):
if ins_root is None and vocal_root is None: if ins_root is None and vocal_root is None:
return "No save root." return "No save root."
name = os.path.basename(music_file) name = os.path.basename(music_file)
@ -120,18 +122,22 @@ class AudioPre:
else: else:
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
logger.info("%s instruments done" % name) logger.info("%s instruments done" % name)
if is_hp3 == True:
head = "vocal_"
else:
head = "instrument_"
if format in ["wav", "flac"]: if format in ["wav", "flac"]:
sf.write( sf.write(
os.path.join( os.path.join(
ins_root, ins_root,
"instrument_{}_{}.{}".format(name, self.data["agg"], format), head + "{}_{}.{}".format(name, self.data["agg"], format),
), ),
(np.array(wav_instrument) * 32768).astype("int16"), (np.array(wav_instrument) * 32768).astype("int16"),
self.mp.param["sr"], self.mp.param["sr"],
) # ) #
else: else:
path = os.path.join( path = os.path.join(
ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
) )
sf.write( sf.write(
path, path,
@ -139,11 +145,18 @@ class AudioPre:
self.mp.param["sr"], self.mp.param["sr"],
) )
if os.path.exists(path): if os.path.exists(path):
os.system( opt_format_path = path[:-4] + ".%s" % format
"ffmpeg -i %s -vn %s -q:a 2 -y" os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
% (path, path[:-4] + ".%s" % format) if os.path.exists(opt_format_path):
) try:
os.remove(path)
except:
pass
if vocal_root is not None: if vocal_root is not None:
if is_hp3 == True:
head = "instrument_"
else:
head = "vocal_"
if self.data["high_end_process"].startswith("mirroring"): if self.data["high_end_process"].startswith("mirroring"):
input_high_end_ = spec_utils.mirroring( input_high_end_ = spec_utils.mirroring(
self.data["high_end_process"], v_spec_m, input_high_end, self.mp self.data["high_end_process"], v_spec_m, input_high_end, self.mp
@ -158,14 +171,14 @@ class AudioPre:
sf.write( sf.write(
os.path.join( os.path.join(
vocal_root, vocal_root,
"vocal_{}_{}.{}".format(name, self.data["agg"], format), head + "{}_{}.{}".format(name, self.data["agg"], format),
), ),
(np.array(wav_vocals) * 32768).astype("int16"), (np.array(wav_vocals) * 32768).astype("int16"),
self.mp.param["sr"], self.mp.param["sr"],
) )
else: else:
path = os.path.join( path = os.path.join(
vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
) )
sf.write( sf.write(
path, path,
@ -173,10 +186,13 @@ class AudioPre:
self.mp.param["sr"], self.mp.param["sr"],
) )
if os.path.exists(path): if os.path.exists(path):
os.system( opt_format_path = path[:-4] + ".%s" % format
"ffmpeg -i %s -vn %s -q:a 2 -y" os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
% (path, path[:-4] + ".%s" % format) if os.path.exists(opt_format_path):
) try:
os.remove(path)
except:
pass
class AudioPreDeEcho: class AudioPreDeEcho:
@ -207,7 +223,7 @@ class AudioPreDeEcho:
self.model = model self.model = model
def _path_audio_( def _path_audio_(
self, music_file, vocal_root=None, ins_root=None, format="flac" self, music_file, vocal_root=None, ins_root=None, format="flac", is_hp3=False
): # 3个VR模型vocal和ins是反的 ): # 3个VR模型vocal和ins是反的
if ins_root is None and vocal_root is None: if ins_root is None and vocal_root is None:
return "No save root." return "No save root."
@ -306,10 +322,13 @@ class AudioPreDeEcho:
self.mp.param["sr"], self.mp.param["sr"],
) )
if os.path.exists(path): if os.path.exists(path):
os.system( opt_format_path = path[:-4] + ".%s" % format
"ffmpeg -i %s -vn %s -q:a 2 -y" os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
% (path, path[:-4] + ".%s" % format) if os.path.exists(opt_format_path):
) try:
os.remove(path)
except:
pass
if vocal_root is not None: if vocal_root is not None:
if self.data["high_end_process"].startswith("mirroring"): if self.data["high_end_process"].startswith("mirroring"):
input_high_end_ = spec_utils.mirroring( input_high_end_ = spec_utils.mirroring(
@ -340,7 +359,10 @@ class AudioPreDeEcho:
self.mp.param["sr"], self.mp.param["sr"],
) )
if os.path.exists(path): if os.path.exists(path):
os.system( opt_format_path = path[:-4] + ".%s" % format
"ffmpeg -i %s -vn %s -q:a 2 -y" os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
% (path, path[:-4] + ".%s" % format) if os.path.exists(opt_format_path):
) try:
os.remove(path)
except:
pass

View File

@ -55,6 +55,7 @@ uvicorn = "^0.21.1"
colorama = "^0.4.6" colorama = "^0.4.6"
torchcrepe = "0.0.20" torchcrepe = "0.0.20"
python-dotenv = "^1.0.0" python-dotenv = "^1.0.0"
av = "^10.0.0"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]