From b4c653142da945f0b93538c37b4b13c6874386dd Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 24 Apr 2023 20:35:56 +0800
Subject: [PATCH] Format code (#142)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 export_onnx.py                        | 38 +++++++++++++++------------
 extract_f0_print.py                   |  2 +-
 gui.py                                | 17 +++++-------
 i18n.py                               |  6 +++--
 infer-web.py                          | 36 +++++++++++++++----------
 my_utils.py                           |  2 +-
 train/data_utils.py                   |  8 +++---
 trainset_preprocess_pipeline_print.py |  6 +++--
 8 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/export_onnx.py b/export_onnx.py
index 8b62b47..719aa7b 100644
--- a/export_onnx.py
+++ b/export_onnx.py
@@ -2,27 +2,29 @@ from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
 import torch
 
-if __name__ == '__main__':
-    MoeVS = True #模型是否为MoeVoiceStudio（原MoeSS）使用
+if __name__ == "__main__":
+    MoeVS = True  # 模型是否为MoeVoiceStudio（原MoeSS）使用
 
-    ModelPath = "Shiroha/shiroha.pth"  #模型路径
-    ExportedPath = "model.onnx"        #输出路径
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    ModelPath = "Shiroha/shiroha.pth"  # 模型路径
+    ExportedPath = "model.onnx"  # 输出路径
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
     print(*cpt["config"])
 
-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
 
-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）
 
     if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
         output_names = [
@@ -52,7 +54,9 @@ if __name__ == '__main__':
             output_names=output_names,
         )
     else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
         output_names = [
@@ -78,4 +82,4 @@ if __name__ == '__main__':
             verbose=False,
             input_names=input_names,
             output_names=output_names,
-        )
\ No newline at end of file
+        )
diff --git a/extract_f0_print.py b/extract_f0_print.py
index f848a0a..d2fef0f 100644
--- a/extract_f0_print.py
+++ b/extract_f0_print.py
@@ -35,7 +35,7 @@ class FeatureInput(object):
     def compute_f0(self, path, f0_method):
         # default resample type of librosa.resample is "soxr_hq".
         # Quality: soxr_vhq > soxr_hq
-        x, sr = librosa.load(path, self.fs)#, res_type='soxr_vhq'
+        x, sr = librosa.load(path, self.fs)  # , res_type='soxr_vhq'
         p_len = x.shape[0] // self.hop
         assert sr == self.fs
         if f0_method == "pm":
diff --git a/gui.py b/gui.py
index 6215435..4146c63 100644
--- a/gui.py
+++ b/gui.py
@@ -67,7 +67,7 @@ class RVC:
             print(e)
 
     def get_f0(self, x, f0_up_key, inp_f0=None):
-        x_pad=1
+        x_pad = 1
         f0_min = 50
         f0_max = 1100
         f0_mel_min = 1127 * np.log(1 + f0_min / 700)
@@ -137,7 +137,7 @@ class RVC:
         feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
         torch.cuda.synchronize()
         print(feats.shape)
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
             pitch, pitchf = self.get_f0(audio, self.f0_up_key)
             p_len = min(feats.shape[1], 13000, pitch.shape[0])  # 太大了爆显存
         else:
@@ -146,7 +146,7 @@ class RVC:
         torch.cuda.synchronize()
         # print(feats.shape,pitch.shape)
         feats = feats[:, :p_len, :]
-        if(self.if_f0==1):
+        if self.if_f0 == 1:
             pitch = pitch[:p_len]
             pitchf = pitchf[:p_len]
             pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
@@ -155,17 +155,15 @@ class RVC:
         ii = 0  # sid
         sid = torch.LongTensor([ii]).to(device)
         with torch.no_grad():
-            if(self.if_f0==1):
+            if self.if_f0 == 1:
                 infered_audio = (
                     self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
                     .data.cpu()
                     .float()
                 )
             else:
-                 infered_audio = (
-                    self.net_g.infer(feats, p_len, sid)[0][0, 0]
-                    .data.cpu()
-                    .float()
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
                 )
         torch.cuda.synchronize()
         return infered_audio
@@ -387,7 +385,7 @@ class GUI:
             self.config.pth_path,
             self.config.index_path,
             self.config.npy_path,
-            self.config.index_rate
+            self.config.index_rate,
         )
         self.input_wav: np.ndarray = np.zeros(
             self.extra_frame
@@ -511,7 +509,6 @@ class GUI:
         total_time = time.perf_counter() - start_time
         self.window["infer_time"].update(int(total_time * 1000))
         print("infer time:" + str(total_time))
-        
 
     def get_devices(self, update: bool = True):
         """获取设备列表"""
diff --git a/i18n.py b/i18n.py
index ec7a866..4cbbe5e 100644
--- a/i18n.py
+++ b/i18n.py
@@ -11,8 +11,10 @@ def load_language_list(language):
 
 class I18nAuto:
     def __init__(self, language=None):
-        if language in ['auto', None]:
-            language = locale.getdefaultlocale()[0]#getlocale can't identify the system's language ((None, None))
+        if language in ["auto", None]:
+            language = locale.getdefaultlocale()[
+                0
+            ]  # getlocale can't identify the system's language ((None, None))
         if not os.path.exists(f"./i18n/{language}.json"):
             language = "en_US"
         self.language = language
diff --git a/infer-web.py b/infer-web.py
index 771a65c..a1cf3c6 100644
--- a/infer-web.py
+++ b/infer-web.py
@@ -119,7 +119,6 @@ for name in os.listdir(weight_uvr5_root):
         uvr5_names.append(name.replace(".pth", ""))
 
 
-
 def vc_single(
     sid,
     input_audio,
@@ -888,23 +887,27 @@ def change_info_(ckpt_path):
 
 from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
 from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
+
+
 def export_onnx(ModelPath, ExportedPath, MoeVS=True):
-    hidden_channels = 256                                              # hidden_channels，为768Vec做准备
-    cpt = torch.load(ModelPath, map_location="cpu")                   
-    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]         # n_spk
+    hidden_channels = 256  # hidden_channels，为768Vec做准备
+    cpt = torch.load(ModelPath, map_location="cpu")
+    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
     print(*cpt["config"])
 
-    test_phone = torch.rand(1, 200, hidden_channels)                   # hidden unit
-    test_phone_lengths = torch.tensor([200]).long()                    # hidden unit 长度（貌似没啥用）
-    test_pitch = torch.randint(size=(1, 200), low=5, high=255)         # 基频（单位赫兹）
-    test_pitchf = torch.rand(1, 200)                                   # nsf基频
-    test_ds = torch.LongTensor([0])                                    # 说话人ID
-    test_rnd = torch.rand(1, 192, 200)                                 # 噪声（加入随机因子）
+    test_phone = torch.rand(1, 200, hidden_channels)  # hidden unit
+    test_phone_lengths = torch.tensor([200]).long()  # hidden unit 长度（貌似没啥用）
+    test_pitch = torch.randint(size=(1, 200), low=5, high=255)  # 基频（单位赫兹）
+    test_pitchf = torch.rand(1, 200)  # nsf基频
+    test_ds = torch.LongTensor([0])  # 说话人ID
+    test_rnd = torch.rand(1, 192, 200)  # 噪声（加入随机因子）
 
-    device = "cpu"  #导出时设备（不影响使用模型）
+    device = "cpu"  # 导出时设备（不影响使用模型）
 
     if MoeVS:
-        net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidM(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
         output_names = [
@@ -934,7 +937,9 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
             output_names=output_names,
         )
     else:
-        net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False)   # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
+        net_g = SynthesizerTrnMs256NSFsidO(
+            *cpt["config"], is_half=False
+        )  # fp32导出（C++要支持fp16必须手动将内存重新排列所以暂时不用fp16）
         net_g.load_state_dict(cpt["weight"], strict=False)
         input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
         output_names = [
@@ -963,6 +968,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
         )
     return "Finished"
 
+
 with gr.Blocks() as app:
     gr.Markdown(
         value=i18n(
@@ -1443,7 +1449,9 @@ with gr.Blocks() as app:
             with gr.Row():
                 ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
             with gr.Row():
-                onnx_dir = gr.Textbox(label=i18n("Onnx输出路径"), value="", interactive=True)
+                onnx_dir = gr.Textbox(
+                    label=i18n("Onnx输出路径"), value="", interactive=True
+                )
             with gr.Row():
                 moevs = gr.Checkbox(label=i18n("MoeVS模型"), value=True)
                 infoOnnx = gr.Label(label="Null")
diff --git a/my_utils.py b/my_utils.py
index 8b7e427..776939d 100644
--- a/my_utils.py
+++ b/my_utils.py
@@ -18,4 +18,4 @@ def load_audio(file, sr):
     except Exception as e:
         raise RuntimeError(f"Failed to load audio: {e}")
 
-    return np.frombuffer(out, np.float32).flatten()
\ No newline at end of file
+    return np.frombuffer(out, np.float32).flatten()
diff --git a/train/data_utils.py b/train/data_utils.py
index 87a435f..6e00a7a 100644
--- a/train/data_utils.py
+++ b/train/data_utils.py
@@ -99,8 +99,8 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
                 )
             )
         audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()
 
         audio_norm = audio_norm.unsqueeze(0)
         spec_filename = filename.replace(".wav", ".spec.pt")
@@ -291,8 +291,8 @@ class TextAudioLoader(torch.utils.data.Dataset):
                 )
             )
         audio_norm = audio
-#        audio_norm = audio / self.max_wav_value
-#        audio_norm = audio / np.abs(audio).max()
+        #        audio_norm = audio / self.max_wav_value
+        #        audio_norm = audio / np.abs(audio).max()
 
         audio_norm = audio_norm.unsqueeze(0)
         spec_filename = filename.replace(".wav", ".spec.pt")
diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py
index f40309a..5da8781 100644
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@@ -61,7 +61,9 @@ class PreProcess:
             self.sr,
             tmp_audio.astype(np.float32),
         )
-        tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)#, res_type="soxr_vhq"
+        tmp_audio = librosa.resample(
+            tmp_audio, orig_sr=self.sr, target_sr=16000
+        )  # , res_type="soxr_vhq"
         wavfile.write(
             "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
             16000,
@@ -72,7 +74,7 @@ class PreProcess:
         try:
             audio = load_audio(path, self.sr)
             # zero phased digital filter cause pre-ringing noise...
-            # audio = signal.filtfilt(self.bh, self.ah, audio) 
+            # audio = signal.filtfilt(self.bh, self.ah, audio)
             audio = signal.lfilter(self.bh, self.ah, audio)
 
             idx1 = 0