From 99996fbe8a18300f9b6c08bc053098f90c5998aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?=
 <41315874+fumiama@users.noreply.github.com>
Date: Thu, 13 Apr 2023 21:32:08 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=AC=94=E8=AE=B0=E6=9C=AC?=
 =?UTF-8?q?=E3=80=81=E7=BF=BB=E8=AF=91=E5=B9=B6=E9=87=8D=E6=96=B0=E5=BC=95?=
 =?UTF-8?q?=E5=85=A5=E8=87=AA=E5=8A=A8push=20(#48)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* optimize: 精简未用到的配置项并在特征提取初步引入mps

* add cmd argument: --noautoopen

* fix: i18n

* fix

* fix

* add genlocale workflow

* add unitest

* fix

* fix

* fix

* 优化笔记本

* reintroduce Push changes

* disable genlocale on non-main branch

* 将笔记本checkout改为stable
---
 .github/workflows/genlocale.yml              | 12 +++-
 Retrieval_based_Voice_Conversion_WebUI.ipynb | 71 +++++++++++++++++---
 gui.py                                       |  6 +-
 locale/en_US.json                            |  7 +-
 locale/ja_JP.json                            | 11 +--
 locale/zh_CN.json                            |  8 +--
 6 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/genlocale.yml b/.github/workflows/genlocale.yml
index e64f6cd..619ccf6 100644
--- a/.github/workflows/genlocale.yml
+++ b/.github/workflows/genlocale.yml
@@ -1,5 +1,8 @@
 name: genlocale
-on: [ push ]
+on:
+  push:
+    branches:
+      - main
 jobs:
   golangci:
     name: genlocale
@@ -22,3 +25,10 @@ jobs:
           git config --local user.email '41898282+github-actions[bot]@users.noreply.github.com'
           git add --all
           git commit -m "🎨 同步 locale"
+
+      - name: Push changes
+        if: ${{ !github.head_ref }}
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: main
diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb
index a47f3b4..956fed9 100644
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@@ -58,7 +58,7 @@
       "source": [
         "#@title 克隆仓库\n",
         "\n",
-        "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
         "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
         "!mkdir -p pretrained uvr5_weights"
       ],
@@ -80,11 +80,22 @@
       "execution_count": null,
       "outputs": []
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 安装aria2\n",
+        "!apt -y install -qq aria2"
+      ],
+      "metadata": {
+        "id": "pqE0PrnuRqI2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "source": [
         "#@title 下载底模\n",
-        "!apt -y install -qq aria2\n",
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
@@ -96,12 +107,7 @@
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
         "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
-        "\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
-        "\n",
-        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth"
       ],
       "metadata": {
         "id": "UG3XpUwEomUz"
@@ -109,6 +115,31 @@
       "execution_count": null,
       "outputs": []
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载人声分离模型\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
+      ],
+      "metadata": {
+        "id": "HugjmZqZRuiF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 下载hubert_base\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ],
+      "metadata": {
+        "id": "2RCaT9FTR0ej"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "source": [
@@ -223,8 +254,12 @@
         "#@title 手动预处理（不推荐）\n",
         "#@markdown 模型名\n",
         "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 采样率\n",
+        "BITRATE = 48000  #@param {type:\"integer\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
         "\n",
-        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 48000 8 logs/{MODELNAME} True\n"
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True\n"
       ],
       "metadata": {
         "id": "ZKAyuKb9J6dz"
@@ -238,8 +273,14 @@
         "#@title 手动提取特征（不推荐）\n",
         "#@markdown 模型名\n",
         "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的进程数\n",
+        "THREADCOUNT = 8  #@param {type:\"integer\"}\n",
+        "#@markdown 音高提取算法\n",
+        "ALGO = \"harvest\"  #@param {type:\"string\"}\n",
         "\n",
-        "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n"
+        "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
+        "\n",
+        "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}\n"
       ],
       "metadata": {
         "id": "CrxJqzAUKmPJ"
@@ -253,14 +294,22 @@
         "#@title 手动训练（不推荐）\n",
         "#@markdown 模型名\n",
         "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 使用的GPU\n",
+        "USEGPU = \"0\"  #@param {type:\"string\"}\n",
+        "#@markdown 批大小\n",
+        "BATCHSIZE = 32  #@param {type:\"integer\"}\n",
         "#@markdown 停止的epoch\n",
         "MODELEPOCH = 3200  #@param {type:\"integer\"}\n",
         "#@markdown 保存epoch间隔\n",
         "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
         "#@markdown 采样率\n",
         "MODELSAMPLE = \"48k\"  #@param {type:\"string\"}\n",
+        "#@markdown 是否缓存训练集\n",
+        "CACHEDATA = 1  #@param {type:\"integer\"}\n",
+        "#@markdown 是否仅保存最新的ckpt文件\n",
+        "ONLYLATEST = 0  #@param {type:\"integer\"}\n",
         "\n",
-        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs 32 -g 0 -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l 0 -c 1\n"
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}\n"
       ],
       "metadata": {
         "id": "IMLPLKOaKj58"
diff --git a/gui.py b/gui.py
index f2b3e2a..058f974 100644
--- a/gui.py
+++ b/gui.py
@@ -165,7 +165,7 @@ class GUI:
         layout=[
             [
                 sg.Frame(title=i18n('加载模型'),layout=[
-                    [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert File'))],
+                    [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert模型'))],
                     [sg.Input(default_text='TEMP\\atri.pth',key='pth_path'),sg.FileBrowse(i18n('选择.pth文件'))],
                     [sg.Input(default_text='TEMP\\added_IVF512_Flat_atri_baseline_src_feat.index',key='index_path'),sg.FileBrowse(i18n('选择.index文件'))],
                     [sg.Input(default_text='TEMP\\big_src_feature_atri.npy',key='npy_path'),sg.FileBrowse(i18n('选择.npy文件'))]
@@ -187,10 +187,10 @@ class GUI:
                     [sg.Text(i18n("采样长度")),sg.Slider(range=(0.1,3.0),key='block_time',resolution=0.1,orientation='h',default_value=1.0)],
                     [sg.Text(i18n("淡入淡出长度")),sg.Slider(range=(0.01,0.15),key='crossfade_length',resolution=0.01,orientation='h',default_value=0.08)],
                     [sg.Text(i18n("额外推理时长")),sg.Slider(range=(0.05,3.00),key='extra_time',resolution=0.01,orientation='h',default_value=0.05)],
-                    [sg.Checkbox(i18n('Input Noisereduce'),key='I_noise_reduce'),sg.Checkbox(i18n('Output Noisereduce'),key='O_noise_reduce')]
+                    [sg.Checkbox(i18n('输入降噪'),key='I_noise_reduce'),sg.Checkbox(i18n('输出降噪'),key='O_noise_reduce')]
                 ],title=i18n("性能设置"))
             ],
-            [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("Infer Time(ms):")),sg.Text("0",key='infer_time')]
+            [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("推理时间(ms):")),sg.Text("0",key='infer_time')]
         ]
         
         self.window=sg.Window("RVC - GUI",layout=layout)
diff --git a/locale/en_US.json b/locale/en_US.json
index 5b724d0..63324ad 100644
--- a/locale/en_US.json
+++ b/locale/en_US.json
@@ -76,6 +76,7 @@
     "点击查看交流、问题反馈群号": "Click to view the communication and problem feedback group number",
     "xxxxx": "xxxxx",
     "加载模型": "加载模型",
+    "Hubert模型": "Hubert File",
     "选择.pth文件": "选择.pth文件",
     "选择.index文件": "选择.index文件",
     "选择.npy文件": "选择.npy文件",
@@ -88,8 +89,10 @@
     "采样长度": "采样长度",
     "淡入淡出长度": "淡入淡出长度",
     "额外推理时长": "额外推理时长",
-    "输出降噪/Output Noisereduce": "输出降噪/Output Noisereduce",
+    "输入降噪": "Input Noisereduce",
+    "输出降噪": "Output Noisereduce",
     "性能设置": "性能设置",
     "开始音频转换": "开始音频转换",
-    "停止音频转换": "停止音频转换"
+    "停止音频转换": "停止音频转换",
+    "推理时间(ms):": "Infer Time(ms):"
 }
\ No newline at end of file
diff --git a/locale/ja_JP.json b/locale/ja_JP.json
index ddcfa09..80cf908 100644
--- a/locale/ja_JP.json
+++ b/locale/ja_JP.json
@@ -36,7 +36,7 @@
     "请指定说话人id": "話者IDを指定してください",
     "处理数据": "データ処理",
     "step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "ステップ2b: CPUを使用して音高を抽出する(モデルに音高がある場合)、GPUを使用して特徴を抽出する(カード番号を選択する)",
-    "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します" ,
+    "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します",
     "显卡信息": "カード情報",
     "提取音高使用的CPU进程数": "抽出に使用するCPUプロセス数",
     "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "音高抽出アルゴリズムの選択:歌声を入力する場合は、pmを使用して速度を上げることができます。CPUが低い場合はdioを使用して速度を上げることができます。harvestは品質が高く、精度が高いですが、遅いです。",
@@ -72,10 +72,11 @@
     "模型是否带音高指导,1是0否": "モデルに音高ガイドを付けるかどうか、1は付ける、0は付けない",
     "提取": "抽出",
     "招募音高曲线前端编辑器": "音高曲線フロントエンドエディターを募集",
-    "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx" ,
+    "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx",
     "点击查看交流、问题反馈群号": "クリックして交流、問題フィードバックグループ番号を表示",
     "xxxxx": "xxxxx",
     "加载模型": "モデルをロードする",
+    "Hubert模型": "Hubert模型",
     "选择.pth文件": ".pthファイルを選択する",
     "选择.index文件": ".indexファイルを選択する",
     "选择.npy文件": ".npyファイルを選択する",
@@ -88,8 +89,10 @@
     "采样长度": "サンプル長",
     "淡入淡出长度": "フェードイン/フェードアウト長",
     "额外推理时长": "追加推論時間",
-    "输出降噪/Output Noisereduce": "出力ノイズリダクション",
+    "输入降噪": "输入降噪",
+    "输出降噪": "输出降噪",
     "性能设置": "パフォーマンス設定",
     "开始音频转换": "音声変換を開始する",
-    "停止音频转换": "音声変換を停止する"
+    "停止音频转换": "音声変換を停止する",
+    "推理时间(ms):": "推理时间(ms):"
 }
\ No newline at end of file
diff --git a/locale/zh_CN.json b/locale/zh_CN.json
index a3ab63b..4b1b672 100644
--- a/locale/zh_CN.json
+++ b/locale/zh_CN.json
@@ -76,7 +76,7 @@
     "点击查看交流、问题反馈群号": "点击查看交流、问题反馈群号",
     "xxxxx": "xxxxx",
     "加载模型": "加载模型",
-    "Hubert File":"Hubert模型",
+    "Hubert模型": "Hubert模型",
     "选择.pth文件": "选择.pth文件",
     "选择.index文件": "选择.index文件",
     "选择.npy文件": "选择.npy文件",
@@ -89,10 +89,10 @@
     "采样长度": "采样长度",
     "淡入淡出长度": "淡入淡出长度",
     "额外推理时长": "额外推理时长",
-    "Input Noisereduce":"输入降噪",
-    "Output Noisereduce": "输出降噪",
+    "输入降噪": "输入降噪",
+    "输出降噪": "输出降噪",
     "性能设置": "性能设置",
     "开始音频转换": "开始音频转换",
     "停止音频转换": "停止音频转换",
-    "Infer Time(ms):":"推理时间(ms):"
+    "推理时间(ms):": "推理时间(ms):"
 }
\ No newline at end of file