From 99996fbe8a18300f9b6c08bc053098f90c5998aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Thu, 13 Apr 2023 21:32:08 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=AC=94=E8=AE=B0=E6=9C=AC?= =?UTF-8?q?=E3=80=81=E7=BF=BB=E8=AF=91=E5=B9=B6=E9=87=8D=E6=96=B0=E5=BC=95?= =?UTF-8?q?=E5=85=A5=E8=87=AA=E5=8A=A8push=20(#48)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * optimize: 精简未用到的配置项并在特征提取初步引入mps * add cmd argument: --noautoopen * fix: i18n * fix * fix * add genlocale workflow * add unitest * fix * fix * fix * 优化笔记本 * reintroduce Push changes * disable genlocale on non-main branch * 将笔记本checkout改为stable --- .github/workflows/genlocale.yml | 12 +++- Retrieval_based_Voice_Conversion_WebUI.ipynb | 71 +++++++++++++++++--- gui.py | 6 +- locale/en_US.json | 7 +- locale/ja_JP.json | 11 +-- locale/zh_CN.json | 8 +-- 6 files changed, 90 insertions(+), 25 deletions(-) diff --git a/.github/workflows/genlocale.yml b/.github/workflows/genlocale.yml index e64f6cd..619ccf6 100644 --- a/.github/workflows/genlocale.yml +++ b/.github/workflows/genlocale.yml @@ -1,5 +1,8 @@ name: genlocale -on: [ push ] +on: + push: + branches: + - main jobs: golangci: name: genlocale @@ -22,3 +25,10 @@ jobs: git config --local user.email '41898282+github-actions[bot]@users.noreply.github.com' git add --all git commit -m "🎨 同步 locale" + + - name: Push changes + if: ${{ !github.head_ref }} + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: main diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb index a47f3b4..956fed9 100644 --- a/Retrieval_based_Voice_Conversion_WebUI.ipynb +++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb @@ -58,7 +58,7 @@ "source": [ "#@title 克隆仓库\n", "\n", - "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n", + "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n", "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", "!mkdir -p pretrained uvr5_weights" ], @@ -80,11 +80,22 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "#@title 安装aria2\n", + "!apt -y install -qq aria2" + ], + "metadata": { + "id": "pqE0PrnuRqI2" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ "#@title 下载底模\n", - "!apt -y install -qq aria2\n", "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", @@ -96,12 +107,7 @@ "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n", - "\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n", - "\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth" ], "metadata": { "id": "UG3XpUwEomUz" @@ -109,6 +115,31 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "#@title 下载人声分离模型\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" + ], + "metadata": { + "id": "HugjmZqZRuiF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title 下载hubert_base\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + ], + "metadata": { + "id": "2RCaT9FTR0ej" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -223,8 +254,12 @@ "#@title 手动预处理(不推荐)\n", "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "#@markdown 采样率\n", + "BITRATE = 48000 #@param {type:\"integer\"}\n", + "#@markdown 使用的进程数\n", + "THREADCOUNT = 8 #@param {type:\"integer\"}\n", "\n", - "!python3 trainset_preprocess_pipeline_print.py /content/dataset 48000 8 logs/{MODELNAME} True\n" + "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True\n" ], "metadata": { "id": "ZKAyuKb9J6dz" @@ -238,8 +273,14 @@ "#@title 手动提取特征(不推荐)\n", "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "#@markdown 使用的进程数\n", + "THREADCOUNT = 8 #@param {type:\"integer\"}\n", + "#@markdown 音高提取算法\n", + "ALGO = \"harvest\" #@param {type:\"string\"}\n", "\n", - "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n" + "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", + "\n", + "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}\n" ], "metadata": { "id": "CrxJqzAUKmPJ" @@ -253,14 +294,22 @@ "#@title 手动训练(不推荐)\n", "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "#@markdown 使用的GPU\n", + "USEGPU = \"0\" #@param {type:\"string\"}\n", + "#@markdown 批大小\n", + "BATCHSIZE = 32 #@param {type:\"integer\"}\n", "#@markdown 停止的epoch\n", "MODELEPOCH = 3200 #@param {type:\"integer\"}\n", "#@markdown 保存epoch间隔\n", "EPOCHSAVE = 100 #@param {type:\"integer\"}\n", "#@markdown 采样率\n", "MODELSAMPLE = \"48k\" #@param {type:\"string\"}\n", + "#@markdown 是否缓存训练集\n", + "CACHEDATA = 1 #@param {type:\"integer\"}\n", + "#@markdown 是否仅保存最新的ckpt文件\n", + "ONLYLATEST = 0 #@param {type:\"integer\"}\n", "\n", - "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs 32 -g 0 -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l 0 -c 1\n" + "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}\n" ], "metadata": { "id": "IMLPLKOaKj58" diff --git a/gui.py b/gui.py index f2b3e2a..058f974 100644 --- a/gui.py +++ b/gui.py @@ -165,7 +165,7 @@ class GUI: layout=[ [ sg.Frame(title=i18n('加载模型'),layout=[ - [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert File'))], + [sg.Input(default_text='TEMP\\hubert_base.pt',key='hubert_path'),sg.FileBrowse(i18n('Hubert模型'))], [sg.Input(default_text='TEMP\\atri.pth',key='pth_path'),sg.FileBrowse(i18n('选择.pth文件'))], [sg.Input(default_text='TEMP\\added_IVF512_Flat_atri_baseline_src_feat.index',key='index_path'),sg.FileBrowse(i18n('选择.index文件'))], [sg.Input(default_text='TEMP\\big_src_feature_atri.npy',key='npy_path'),sg.FileBrowse(i18n('选择.npy文件'))] @@ -187,10 +187,10 @@ class GUI: [sg.Text(i18n("采样长度")),sg.Slider(range=(0.1,3.0),key='block_time',resolution=0.1,orientation='h',default_value=1.0)], [sg.Text(i18n("淡入淡出长度")),sg.Slider(range=(0.01,0.15),key='crossfade_length',resolution=0.01,orientation='h',default_value=0.08)], [sg.Text(i18n("额外推理时长")),sg.Slider(range=(0.05,3.00),key='extra_time',resolution=0.01,orientation='h',default_value=0.05)], - [sg.Checkbox(i18n('Input Noisereduce'),key='I_noise_reduce'),sg.Checkbox(i18n('Output Noisereduce'),key='O_noise_reduce')] + [sg.Checkbox(i18n('输入降噪'),key='I_noise_reduce'),sg.Checkbox(i18n('输出降噪'),key='O_noise_reduce')] ],title=i18n("性能设置")) ], - [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("Infer Time(ms):")),sg.Text("0",key='infer_time')] + [sg.Button(i18n("开始音频转换"),key='start_vc'),sg.Button(i18n("停止音频转换"),key='stop_vc'),sg.Text(i18n("推理时间(ms):")),sg.Text("0",key='infer_time')] ] self.window=sg.Window("RVC - GUI",layout=layout) diff --git a/locale/en_US.json b/locale/en_US.json index 5b724d0..63324ad 100644 --- a/locale/en_US.json +++ b/locale/en_US.json @@ -76,6 +76,7 @@ "点击查看交流、问题反馈群号": "Click to view the communication and problem feedback group number", "xxxxx": "xxxxx", "加载模型": "加载模型", + "Hubert模型": "Hubert File", "选择.pth文件": "选择.pth文件", "选择.index文件": "选择.index文件", "选择.npy文件": "选择.npy文件", @@ -88,8 +89,10 @@ "采样长度": "采样长度", "淡入淡出长度": "淡入淡出长度", "额外推理时长": "额外推理时长", - "输出降噪/Output Noisereduce": "输出降噪/Output Noisereduce", + "输入降噪": "Input Noisereduce", + "输出降噪": "Output Noisereduce", "性能设置": "性能设置", "开始音频转换": "开始音频转换", - "停止音频转换": "停止音频转换" + "停止音频转换": "停止音频转换", + "推理时间(ms):": "Infer Time(ms):" } \ No newline at end of file diff --git a/locale/ja_JP.json b/locale/ja_JP.json index ddcfa09..80cf908 100644 --- a/locale/ja_JP.json +++ b/locale/ja_JP.json @@ -36,7 +36,7 @@ "请指定说话人id": "話者IDを指定してください", "处理数据": "データ処理", "step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "ステップ2b: CPUを使用して音高を抽出する(モデルに音高がある場合)、GPUを使用して特徴を抽出する(カード番号を選択する)", - "以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します" , + "以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2": "ハイフンで区切って使用するカード番号を入力します。例えば0-1-2はカード0、カード1、カード2を使用します", "显卡信息": "カード情報", "提取音高使用的CPU进程数": "抽出に使用するCPUプロセス数", "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "音高抽出アルゴリズムの選択:歌声を入力する場合は、pmを使用して速度を上げることができます。CPUが低い場合はdioを使用して速度を上げることができます。harvestは品質が高く、精度が高いですが、遅いです。", @@ -72,10 +72,11 @@ "模型是否带音高指导,1是0否": "モデルに音高ガイドを付けるかどうか、1は付ける、0は付けない", "提取": "抽出", "招募音高曲线前端编辑器": "音高曲線フロントエンドエディターを募集", - "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx" , + "加开发群联系我xxxxx": "開発グループに参加して私に連絡してくださいxxxxx", "点击查看交流、问题反馈群号": "クリックして交流、問題フィードバックグループ番号を表示", "xxxxx": "xxxxx", "加载模型": "モデルをロードする", + "Hubert模型": "Hubert模型", "选择.pth文件": ".pthファイルを選択する", "选择.index文件": ".indexファイルを選択する", "选择.npy文件": ".npyファイルを選択する", @@ -88,8 +89,10 @@ "采样长度": "サンプル長", "淡入淡出长度": "フェードイン/フェードアウト長", "额外推理时长": "追加推論時間", - "输出降噪/Output Noisereduce": "出力ノイズリダクション", + "输入降噪": "输入降噪", + "输出降噪": "输出降噪", "性能设置": "パフォーマンス設定", "开始音频转换": "音声変換を開始する", - "停止音频转换": "音声変換を停止する" + "停止音频转换": "音声変換を停止する", + "推理时间(ms):": "推理时间(ms):" } \ No newline at end of file diff --git a/locale/zh_CN.json b/locale/zh_CN.json index a3ab63b..4b1b672 100644 --- a/locale/zh_CN.json +++ b/locale/zh_CN.json @@ -76,7 +76,7 @@ "点击查看交流、问题反馈群号": "点击查看交流、问题反馈群号", "xxxxx": "xxxxx", "加载模型": "加载模型", - "Hubert File":"Hubert模型", + "Hubert模型": "Hubert模型", "选择.pth文件": "选择.pth文件", "选择.index文件": "选择.index文件", "选择.npy文件": "选择.npy文件", @@ -89,10 +89,10 @@ "采样长度": "采样长度", "淡入淡出长度": "淡入淡出长度", "额外推理时长": "额外推理时长", - "Input Noisereduce":"输入降噪", - "Output Noisereduce": "输出降噪", + "输入降噪": "输入降噪", + "输出降噪": "输出降噪", "性能设置": "性能设置", "开始音频转换": "开始音频转换", "停止音频转换": "停止音频转换", - "Infer Time(ms):":"推理时间(ms):" + "推理时间(ms):": "推理时间(ms):" } \ No newline at end of file