mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-01-01 12:35:04 +08:00
chore(sync): merge dev into main (#1399)
* Update mdxnet.py * Update modules.py * Rename preprocess.py to vr.py * Add files via upload * deps: add av lib (#1391) * Add files via upload * chore(format): run black on dev (#1398) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Co-authored-by: Hiroto N <hironow365@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
1fdbb94811
commit
1696c5356b
@ -1,3 +1,6 @@
|
|||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import av
|
import av
|
||||||
@ -47,10 +50,14 @@ def audio2(i, o, format, sr):
|
|||||||
|
|
||||||
|
|
||||||
def load_audio(file, sr):
|
def load_audio(file, sr):
|
||||||
try:
|
|
||||||
file = (
|
file = (
|
||||||
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||||
) # 防止小白拷路径头尾带了空格和"和回车
|
) # 防止小白拷路径头尾带了空格和"和回车
|
||||||
|
if os.path.exists(file) == False:
|
||||||
|
raise RuntimeError(
|
||||||
|
"You input a wrong audio path that does not exists, please fix it!"
|
||||||
|
)
|
||||||
|
try:
|
||||||
with open(file, "rb") as f:
|
with open(file, "rb") as f:
|
||||||
with BytesIO() as out:
|
with BytesIO() as out:
|
||||||
audio2(f, out, "f32le", sr)
|
audio2(f, out, "f32le", sr)
|
||||||
@ -62,5 +69,5 @@ def load_audio(file, sr):
|
|||||||
audio = np.mean(audio, -1)
|
audio = np.mean(audio, -1)
|
||||||
return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
|
return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
|
||||||
|
|
||||||
except Exception as e:
|
except:
|
||||||
raise RuntimeError(f"Failed to load audio: {e}")
|
raise RuntimeError(traceback.format_exc())
|
||||||
|
@ -216,16 +216,26 @@ class Predictor:
|
|||||||
path_other = "%s/%s_others.wav" % (others_root, basename)
|
path_other = "%s/%s_others.wav" % (others_root, basename)
|
||||||
sf.write(path_vocal, mix - opt, rate)
|
sf.write(path_vocal, mix - opt, rate)
|
||||||
sf.write(path_other, opt, rate)
|
sf.write(path_other, opt, rate)
|
||||||
|
opt_path_vocal = path_vocal[:-4] + ".%s" % format
|
||||||
|
opt_path_other = path_other[:-4] + ".%s" % format
|
||||||
if os.path.exists(path_vocal):
|
if os.path.exists(path_vocal):
|
||||||
os.system(
|
os.system(
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
"ffmpeg -i %s -vn %s -q:a 2 -y" % (path_vocal, opt_path_vocal)
|
||||||
% (path_vocal, path_vocal[:-4] + ".%s" % format)
|
|
||||||
)
|
)
|
||||||
|
if os.path.exists(opt_path_vocal):
|
||||||
|
try:
|
||||||
|
os.remove(path_vocal)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if os.path.exists(path_other):
|
if os.path.exists(path_other):
|
||||||
os.system(
|
os.system(
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
"ffmpeg -i %s -vn %s -q:a 2 -y" % (path_other, opt_path_other)
|
||||||
% (path_other, path_other[:-4] + ".%s" % format)
|
|
||||||
)
|
)
|
||||||
|
if os.path.exists(opt_path_other):
|
||||||
|
try:
|
||||||
|
os.remove(path_other)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MDXNetDereverb:
|
class MDXNetDereverb:
|
||||||
@ -242,5 +252,5 @@ class MDXNetDereverb:
|
|||||||
self.pred = Predictor(self)
|
self.pred = Predictor(self)
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
def path_audio(self, input, vocal_root, others_root, format):
|
def _path_audio_(self, input, vocal_root, others_root, format, is_hp3=False):
|
||||||
self.pred.prediction(input, vocal_root, others_root, format)
|
self.pred.prediction(input, vocal_root, others_root, format)
|
||||||
|
@ -9,7 +9,7 @@ import torch
|
|||||||
|
|
||||||
from configs.config import Config
|
from configs.config import Config
|
||||||
from infer.modules.uvr5.mdxnet import MDXNetDereverb
|
from infer.modules.uvr5.mdxnet import MDXNetDereverb
|
||||||
from infer.modules.uvr5.preprocess import AudioPre, AudioPreDeEcho
|
from infer.modules.uvr5.vr import AudioPre, AudioPreDeEcho
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
||||||
@ -36,6 +36,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
|||||||
device=config.device,
|
device=config.device,
|
||||||
is_half=config.is_half,
|
is_half=config.is_half,
|
||||||
)
|
)
|
||||||
|
is_hp3 = "HP3" in model_name
|
||||||
if inp_root != "":
|
if inp_root != "":
|
||||||
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
|
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
|
||||||
else:
|
else:
|
||||||
@ -52,7 +53,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
|||||||
):
|
):
|
||||||
need_reformat = 0
|
need_reformat = 0
|
||||||
pre_fun._path_audio_(
|
pre_fun._path_audio_(
|
||||||
inp_path, save_root_ins, save_root_vocal, format0
|
inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
|
||||||
)
|
)
|
||||||
done = 1
|
done = 1
|
||||||
except:
|
except:
|
||||||
@ -70,7 +71,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
|||||||
inp_path = tmp_path
|
inp_path = tmp_path
|
||||||
try:
|
try:
|
||||||
if done == 0:
|
if done == 0:
|
||||||
pre_fun.path_audio(
|
pre_fun._path_audio_(
|
||||||
inp_path, save_root_ins, save_root_vocal, format0
|
inp_path, save_root_ins, save_root_vocal, format0
|
||||||
)
|
)
|
||||||
infos.append("%s->Success" % (os.path.basename(inp_path)))
|
infos.append("%s->Success" % (os.path.basename(inp_path)))
|
||||||
|
@ -41,7 +41,9 @@ class AudioPre:
|
|||||||
self.mp = mp
|
self.mp = mp
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"):
|
def _path_audio_(
|
||||||
|
self, music_file, ins_root=None, vocal_root=None, format="flac", is_hp3=False
|
||||||
|
):
|
||||||
if ins_root is None and vocal_root is None:
|
if ins_root is None and vocal_root is None:
|
||||||
return "No save root."
|
return "No save root."
|
||||||
name = os.path.basename(music_file)
|
name = os.path.basename(music_file)
|
||||||
@ -120,18 +122,22 @@ class AudioPre:
|
|||||||
else:
|
else:
|
||||||
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
|
||||||
logger.info("%s instruments done" % name)
|
logger.info("%s instruments done" % name)
|
||||||
|
if is_hp3 == True:
|
||||||
|
head = "vocal_"
|
||||||
|
else:
|
||||||
|
head = "instrument_"
|
||||||
if format in ["wav", "flac"]:
|
if format in ["wav", "flac"]:
|
||||||
sf.write(
|
sf.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
ins_root,
|
ins_root,
|
||||||
"instrument_{}_{}.{}".format(name, self.data["agg"], format),
|
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||||
),
|
),
|
||||||
(np.array(wav_instrument) * 32768).astype("int16"),
|
(np.array(wav_instrument) * 32768).astype("int16"),
|
||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
) #
|
) #
|
||||||
else:
|
else:
|
||||||
path = os.path.join(
|
path = os.path.join(
|
||||||
ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
|
ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
||||||
)
|
)
|
||||||
sf.write(
|
sf.write(
|
||||||
path,
|
path,
|
||||||
@ -139,11 +145,18 @@ class AudioPre:
|
|||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.system(
|
opt_format_path = path[:-4] + ".%s" % format
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
|
||||||
% (path, path[:-4] + ".%s" % format)
|
if os.path.exists(opt_format_path):
|
||||||
)
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if vocal_root is not None:
|
if vocal_root is not None:
|
||||||
|
if is_hp3 == True:
|
||||||
|
head = "instrument_"
|
||||||
|
else:
|
||||||
|
head = "vocal_"
|
||||||
if self.data["high_end_process"].startswith("mirroring"):
|
if self.data["high_end_process"].startswith("mirroring"):
|
||||||
input_high_end_ = spec_utils.mirroring(
|
input_high_end_ = spec_utils.mirroring(
|
||||||
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
|
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
|
||||||
@ -158,14 +171,14 @@ class AudioPre:
|
|||||||
sf.write(
|
sf.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
vocal_root,
|
vocal_root,
|
||||||
"vocal_{}_{}.{}".format(name, self.data["agg"], format),
|
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||||
),
|
),
|
||||||
(np.array(wav_vocals) * 32768).astype("int16"),
|
(np.array(wav_vocals) * 32768).astype("int16"),
|
||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
path = os.path.join(
|
path = os.path.join(
|
||||||
vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
|
vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
||||||
)
|
)
|
||||||
sf.write(
|
sf.write(
|
||||||
path,
|
path,
|
||||||
@ -173,10 +186,13 @@ class AudioPre:
|
|||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.system(
|
opt_format_path = path[:-4] + ".%s" % format
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
|
||||||
% (path, path[:-4] + ".%s" % format)
|
if os.path.exists(opt_format_path):
|
||||||
)
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class AudioPreDeEcho:
|
class AudioPreDeEcho:
|
||||||
@ -207,7 +223,7 @@ class AudioPreDeEcho:
|
|||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
def _path_audio_(
|
def _path_audio_(
|
||||||
self, music_file, vocal_root=None, ins_root=None, format="flac"
|
self, music_file, vocal_root=None, ins_root=None, format="flac", is_hp3=False
|
||||||
): # 3个VR模型vocal和ins是反的
|
): # 3个VR模型vocal和ins是反的
|
||||||
if ins_root is None and vocal_root is None:
|
if ins_root is None and vocal_root is None:
|
||||||
return "No save root."
|
return "No save root."
|
||||||
@ -306,10 +322,13 @@ class AudioPreDeEcho:
|
|||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.system(
|
opt_format_path = path[:-4] + ".%s" % format
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
|
||||||
% (path, path[:-4] + ".%s" % format)
|
if os.path.exists(opt_format_path):
|
||||||
)
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if vocal_root is not None:
|
if vocal_root is not None:
|
||||||
if self.data["high_end_process"].startswith("mirroring"):
|
if self.data["high_end_process"].startswith("mirroring"):
|
||||||
input_high_end_ = spec_utils.mirroring(
|
input_high_end_ = spec_utils.mirroring(
|
||||||
@ -340,7 +359,10 @@ class AudioPreDeEcho:
|
|||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.system(
|
opt_format_path = path[:-4] + ".%s" % format
|
||||||
"ffmpeg -i %s -vn %s -q:a 2 -y"
|
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
|
||||||
% (path, path[:-4] + ".%s" % format)
|
if os.path.exists(opt_format_path):
|
||||||
)
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except:
|
||||||
|
pass
|
@ -55,6 +55,7 @@ uvicorn = "^0.21.1"
|
|||||||
colorama = "^0.4.6"
|
colorama = "^0.4.6"
|
||||||
torchcrepe = "0.0.20"
|
torchcrepe = "0.0.20"
|
||||||
python-dotenv = "^1.0.0"
|
python-dotenv = "^1.0.0"
|
||||||
|
av = "^10.0.0"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user