Format code (#142)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2025-05-14 15:59:07 +08:00 · 2023-04-24 20:35:56 +08:00 · 2023-04-24 20:35:56 +08:00 · b4c653142d
commit b4c653142d
parent 376bd31c19
8 changed files with 64 additions and 51 deletions
--- a/export_onnx.py
+++ b/export_onnx.py
@ -2,27 +2,29 @@ from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
 import torch

-if __name__ == '__main__':
-    MoeVS = True #模型是否为MoeVoiceStudio（原MoeSS）使用
+if __name__ == "__main__":
+    MoeVS = True  # 模型是否为MoeVoiceStudio（原MoeSS）使用

-    ModelPath = "Shiroha/shiroha.pth"  #模型路径
-    ExportedPath = "model.onnx"        #输出路径
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    ModelPath = "Shiroha/shiroha.pth"  # 模型路径
+    ExportedPath = "model.onnx"  # 输出路径
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
    print(*cpt["config"])

-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）

-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）

    if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
        net_g.load_state_dict(cpt["weight"], strict=False)
        input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
        output_names = [
@ -52,7 +54,9 @@ if __name__ == '__main__':
            output_names=output_names,
        )
    else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
        net_g.load_state_dict(cpt["weight"], strict=False)
        input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
        output_names = [
@ -78,4 +82,4 @@ if __name__ == '__main__':
            verbose=False,
            input_names=input_names,
            output_names=output_names,
-        )
+        )
--- a/extract_f0_print.py
+++ b/extract_f0_print.py
@ -35,7 +35,7 @@ class FeatureInput(object):
    def compute_f0(self, path, f0_method):
        # default resample type of librosa.resample is "soxr_hq".
        # Quality: soxr_vhq > soxr_hq
-        x, sr = librosa.load(path, self.fs)#, res_type='soxr_vhq'
+        x, sr = librosa.load(path, self.fs)  # , res_type='soxr_vhq'
        p_len = x.shape[0] // self.hop
        assert sr == self.fs
        if f0_method == "pm":
--- a/gui.py
+++ b/gui.py
@ -67,7 +67,7 @@ class RVC:
            print(e)

    def get_f0(self, x, f0_up_key, inp_f0=None):
-        x_pad=1
+        x_pad = 1
        f0_min = 50
        f0_max = 1100
        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
@ -137,7 +137,7 @@ class RVC:
        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
        torch.cuda.synchronize()
        print(feats.shape)
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
            pitch, pitchf = self.get_f0(audio, self.f0_up_key)
            p_len = min(feats.shape[1], 13000, pitch.shape[0])  # 太大了爆显存
        else:
@ -146,7 +146,7 @@ class RVC:
        torch.cuda.synchronize()
        # print(feats.shape,pitch.shape)
        feats = feats[:, :p_len, :]
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
            pitch = pitch[:p_len]
            pitchf = pitchf[:p_len]
            pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
@ -155,17 +155,15 @@ class RVC:
        ii = 0  # sid
        sid = torch.LongTensor([ii]).to(device)
        with torch.no_grad():
-            if(self.if_f0==1):
+            if self.if_f0 == 1:
                infered_audio = (
                    self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
                    .data.cpu()
                    .float()
                )
            else:
-                 infered_audio = (
-                    self.net_g.infer(feats, p_len, sid)[0][0, 0]
-                    .data.cpu()
-                    .float()
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
                )
        torch.cuda.synchronize()
        return infered_audio
@ -387,7 +385,7 @@ class GUI:
            self.config.pth_path,
            self.config.index_path,
            self.config.npy_path,
-            self.config.index_rate
+            self.config.index_rate,
        )
        self.input_wav: np.ndarray = np.zeros(
            self.extra_frame
@ -511,7 +509,6 @@ class GUI:
        total_time = time.perf_counter() - start_time
        self.window["infer_time"].update(int(total_time * 1000))
        print("infer time:" + str(total_time))
-        

    def get_devices(self, update: bool = True):
        """获取设备列表"""
--- a/i18n.py
+++ b/i18n.py
@ -11,8 +11,10 @@ def load_language_list(language):

 class I18nAuto:
    def __init__(self, language=None):
-        if language in ['auto', None]:
-            language = locale.getdefaultlocale()[0]#getlocale can't identify the system's language ((None, None))
+        if language in ["auto", None]:
+            language = locale.getdefaultlocale()[
+                0
+            ]  # getlocale can't identify the system's language ((None, None))
        if not os.path.exists(f"./i18n/{language}.json"):
            language = "en_US"
        self.language = language
--- a/infer-web.py
+++ b/infer-web.py
@ -119,7 +119,6 @@ for name in os.listdir(weight_uvr5_root):
        uvr5_names.append(name.replace(".pth", ""))


-
 def vc_single(
    sid,
    input_audio,
@ -888,23 +887,27 @@ def change_info_(ckpt_path):

 from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
+
+
 def export_onnx(ModelPath, ExportedPath, MoeVS=True):
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
    print(*cpt["config"])

-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）

-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）

    if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
        net_g.load_state_dict(cpt["weight"], strict=False)
        input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
        output_names = [
@ -934,7 +937,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
            output_names=output_names,
        )
    else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
        net_g.load_state_dict(cpt["weight"], strict=False)
        input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
        output_names = [
@ -963,6 +968,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
        )
    return "Finished"

+
 with gr.Blocks() as app:
    gr.Markdown(
        value=i18n(
@ -1443,7 +1449,9 @@ with gr.Blocks() as app:
            with gr.Row():
                ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
            with gr.Row():
-                onnx_dir = gr.Textbox(label=i18n("Onnx输出路径"), value="", interactive=True)
+                onnx_dir = gr.Textbox(
+                    label=i18n("Onnx输出路径"), value="", interactive=True
+                )
            with gr.Row():
                moevs = gr.Checkbox(label=i18n("MoeVS模型"), value=True)
                infoOnnx = gr.Label(label="Null")
--- a/my_utils.py
+++ b/my_utils.py
@ -18,4 +18,4 @@ def load_audio(file, sr):
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")

-    return np.frombuffer(out, np.float32).flatten()
+    return np.frombuffer(out, np.float32).flatten()
--- a/train/data_utils.py
+++ b/train/data_utils.py
@ -99,8 +99,8 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
                )
            )
        audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()

        audio_norm = audio_norm.unsqueeze(0)
        spec_filename = filename.replace(".wav", ".spec.pt")
@ -291,8 +291,8 @@ class TextAudioLoader(torch.utils.data.Dataset):
                )
            )
        audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()

        audio_norm = audio_norm.unsqueeze(0)
        spec_filename = filename.replace(".wav", ".spec.pt")
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@ -61,7 +61,9 @@ class PreProcess:
            self.sr,
            tmp_audio.astype(np.float32),
        )
-        tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)#, res_type="soxr_vhq"
+        tmp_audio = librosa.resample(
+            tmp_audio, orig_sr=self.sr, target_sr=16000
+        )  # , res_type="soxr_vhq"
        wavfile.write(
            "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
            16000,
@ -72,7 +74,7 @@ class PreProcess:
        try:
            audio = load_audio(path, self.sr)
            # zero phased digital filter cause pre-ringing noise...
-            # audio = signal.filtfilt(self.bh, self.ah, audio) 
+            # audio = signal.filtfilt(self.bh, self.ah, audio)
            audio = signal.lfilter(self.bh, self.ah, audio)

            idx1 = 0