This commit is contained in:
Ftps 2023-08-19 20:01:16 +09:00
commit 055864cc90
3 changed files with 18 additions and 10 deletions

View File

@ -13,7 +13,7 @@ cpu = torch.device("cpu")
class ConvTDFNetTrim:
def __init__(
self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024
self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024
):
super(ConvTDFNetTrim, self).__init__()
@ -83,7 +83,7 @@ def get_models(device, dim_f, dim_t, n_fft):
dim_f=dim_f,
dim_t=dim_t,
n_fft=n_fft,
)
)
class Predictor:
@ -95,7 +95,11 @@ class Predictor:
)
self.model = ort.InferenceSession(
os.path.join(args.onnx, self.model_.target_name + ".onnx"),
providers=["CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"],
providers=[
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
],
)
print("onnx load done")

View File

@ -27,7 +27,9 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho
pre_fun = func(
agg=int(agg),
model_path=os.path.join(os.getenv("weight_uvr5_root"), model_name + ".pth"),
model_path=os.path.join(
os.getenv("weight_uvr5_root"), model_name + ".pth"
),
device=config.device,
is_half=config.is_half,
)
@ -54,7 +56,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
need_reformat = 1
traceback.print_exc()
if need_reformat == 1:
tmp_path = "%s/%s.reformatted.wav" % (os.path.join("tmp"), os.path.basename(inp_path))
tmp_path = "%s/%s.reformatted.wav" % (
os.path.join("tmp"),
os.path.basename(inp_path),
)
os.system(
"ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
% (inp_path, tmp_path)
@ -89,4 +94,3 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
if torch.cuda.is_available():
torch.cuda.empty_cache()
yield "\n".join(infos)

View File

@ -205,7 +205,7 @@ class AudioPreDeEcho:
self.model = model
def _path_audio_(
self, music_file, vocal_root=None, ins_root=None, format="flac"
self, music_file, vocal_root=None, ins_root=None, format="flac"
): # 3个VR模型vocal和ins是反的
if ins_root is None and vocal_root is None:
return "No save root."
@ -222,7 +222,7 @@ class AudioPreDeEcho:
if d == bands_n: # high-end band
(
X_wave[d],
_,
_,
) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug应该上ffmpeg读取但是太麻烦了弃坑
music_file,
bp["sr"],