Merge branch 'clean' of https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI into clean

2025-05-21 11:13:03 +08:00 · 2023-08-19 20:01:16 +09:00 · 2023-08-19 20:01:16 +09:00 · 055864cc90
commit 055864cc90
parent 92f18e2f81 cb42c6990b
3 changed files with 18 additions and 10 deletions
--- a/infer/modules/uvr5/mdxnet.py
+++ b/infer/modules/uvr5/mdxnet.py
@ -13,7 +13,7 @@ cpu = torch.device("cpu")

 class ConvTDFNetTrim:
    def __init__(
-            self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024
+        self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024
    ):
        super(ConvTDFNetTrim, self).__init__()

@ -83,7 +83,7 @@ def get_models(device, dim_f, dim_t, n_fft):
        dim_f=dim_f,
        dim_t=dim_t,
        n_fft=n_fft,
-        )
+    )


 class Predictor:
@ -95,7 +95,11 @@ class Predictor:
        )
        self.model = ort.InferenceSession(
            os.path.join(args.onnx, self.model_.target_name + ".onnx"),
-            providers=["CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"],
+            providers=[
+                "CUDAExecutionProvider",
+                "DmlExecutionProvider",
+                "CPUExecutionProvider",
+            ],
        )
        print("onnx load done")

@ -236,4 +240,4 @@ class MDXNetDereverb:
        self.device = device

    def path_audio(self, input, vocal_root, others_root, format):
-        self.pred.prediction(input, vocal_root, others_root, format)
+        self.pred.prediction(input, vocal_root, others_root, format)
--- a/infer/modules/uvr5/modules.py
+++ b/infer/modules/uvr5/modules.py
@ -27,7 +27,9 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
            func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho
            pre_fun = func(
                agg=int(agg),
-                model_path=os.path.join(os.getenv("weight_uvr5_root"), model_name + ".pth"),
+                model_path=os.path.join(
+                    os.getenv("weight_uvr5_root"), model_name + ".pth"
+                ),
                device=config.device,
                is_half=config.is_half,
            )
@ -54,7 +56,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
                need_reformat = 1
                traceback.print_exc()
            if need_reformat == 1:
-                tmp_path = "%s/%s.reformatted.wav" % (os.path.join("tmp"), os.path.basename(inp_path))
+                tmp_path = "%s/%s.reformatted.wav" % (
+                    os.path.join("tmp"),
+                    os.path.basename(inp_path),
+                )
                os.system(
                    "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
                    % (inp_path, tmp_path)
@ -89,4 +94,3 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    yield "\n".join(infos)
-    
--- a/infer/modules/uvr5/preprocess.py
+++ b/infer/modules/uvr5/preprocess.py
@ -205,7 +205,7 @@ class AudioPreDeEcho:
        self.model = model

    def _path_audio_(
-            self, music_file, vocal_root=None, ins_root=None, format="flac"
+        self, music_file, vocal_root=None, ins_root=None, format="flac"
    ):  # 3个VR模型vocal和ins是反的
        if ins_root is None and vocal_root is None:
            return "No save root."
@ -222,7 +222,7 @@ class AudioPreDeEcho:
            if d == bands_n:  # high-end band
                (
                    X_wave[d],
-                            _,
+                    _,
                ) = librosa.core.load(  # 理论上librosa读取可能对某些音频有bug，应该上ffmpeg读取，但是太麻烦了弃坑
                    music_file,
                    bp["sr"],
@ -341,4 +341,4 @@ class AudioPreDeEcho:
                    os.system(
                        "ffmpeg -i %s -vn %s -q:a 2 -y"
                        % (path, path[:-4] + ".%s" % format)
-                    )
+                    )