From 738e55f05c1bb22ae9f2f2ee4b546b2f2e3bbe7a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 26 Jan 2024 08:09:56 +0000 Subject: [PATCH] chore(format): run black on dev --- gui_v1.py | 10 +- infer-web.py | 156 ++++++++++++++----- infer/lib/infer_pack/models.py | 8 +- infer/lib/infer_pack/models_onnx.py | 8 +- infer/modules/ipex/attention.py | 114 +++++++------- infer/modules/ipex/hijacks.py | 42 +++-- infer/modules/train/extract_feature_print.py | 8 +- infer/modules/vc/modules.py | 22 +-- tools/app.py | 28 +++- tools/infer/infer-pm-index256.py | 1 + tools/infer/train-index-v2.py | 1 + tools/infer/train-index.py | 1 + tools/onnx_inference_demo.py | 4 +- tools/rvc_for_realtime.py | 10 +- tools/torchgate/__init__.py | 1 + 15 files changed, 266 insertions(+), 148 deletions(-) diff --git a/gui_v1.py b/gui_v1.py index d5cd27c..f30d8a3 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -877,11 +877,11 @@ if __name__ == "__main__": self.input_wav_denoise[-self.block_frame - 2 * self.zc :] )[160:] else: - self.input_wav_res[ - -160 * (indata.shape[0] // self.zc + 1) : - ] = self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[ - 160: - ] + self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = ( + self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[ + 160: + ] + ) # infer if self.function == "vc": infer_wav = self.rvc.infer( diff --git a/infer-web.py b/infer-web.py index 5d94ee3..d6fb7db 100644 --- a/infer-web.py +++ b/infer-web.py @@ -93,7 +93,9 @@ if torch.cuda.is_available() or ngpu != 0: "90", "M4", "T4", - "TITAN","4060", "L", + "TITAN", + "4060", + "L", "6000", ] ): @@ -415,12 +417,16 @@ def get_pretrained_models(path_str, f0_str, sr2): sr2, ) return ( - "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) - if if_pretrained_generator_exist - else "", - "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) - if if_pretrained_discriminator_exist - else "", + ( + "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_generator_exist + else "" + ), + ( + "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_discriminator_exist + else "" + ), ) @@ -762,7 +768,9 @@ def train1key( if_save_every_weights18, version19, ) - yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) + yield get_info_str( + i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log") + ) # step3b:训练索引 [get_info_str(_) for _ in train_index(exp_dir1, version19)] @@ -809,7 +817,9 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Row(): sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names)) with gr.Column(): - refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary") + refresh_button = gr.Button( + i18n("刷新音色列表和索引路径"), variant="primary" + ) clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") spk_item = gr.Slider( minimum=0, @@ -828,14 +838,19 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Row(): with gr.Column(): vc_transform0 = gr.Number( - label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 + label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), + value=0, ) input_audio0 = gr.Textbox( - label=i18n("输入待处理音频文件路径(默认是正确格式示例)"), + label=i18n( + "输入待处理音频文件路径(默认是正确格式示例)" + ), placeholder="C:\\Users\\Desktop\\audio_example.wav", ) file_index1 = gr.Textbox( - label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), + label=i18n( + "特征检索库文件路径,为空则使用下拉的选择结果" + ), placeholder="C:\\Users\\Desktop\\model_example.index", interactive=True, ) @@ -848,9 +863,11 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), - choices=["pm", "harvest", "crepe", "rmvpe"] - if config.dml == False - else ["pm", "harvest", "rmvpe"], + choices=( + ["pm", "harvest", "crepe", "rmvpe"] + if config.dml == False + else ["pm", "harvest", "rmvpe"] + ), value="rmvpe", interactive=True, ) @@ -867,7 +884,9 @@ with gr.Blocks(title="RVC WebUI") as app: rms_mix_rate0 = gr.Slider( minimum=0, maximum=1, - label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + label=i18n( + "输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络" + ), value=0.25, interactive=True, ) @@ -899,7 +918,9 @@ with gr.Blocks(title="RVC WebUI") as app: interactive=True, ) f0_file = gr.File( - label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), + label=i18n( + "F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调" + ), visible=False, ) @@ -919,7 +940,9 @@ with gr.Blocks(title="RVC WebUI") as app: but0 = gr.Button(i18n("转换"), variant="primary") with gr.Row(): vc_output1 = gr.Textbox(label=i18n("输出信息")) - vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) + vc_output2 = gr.Audio( + label=i18n("输出音频(右下角三个点,点了可以下载)") + ) but0.click( vc.vc_single, @@ -943,14 +966,19 @@ with gr.Blocks(title="RVC WebUI") as app: ) with gr.TabItem(i18n("批量推理")): gr.Markdown( - value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ") + value=i18n( + "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. " + ) ) with gr.Row(): with gr.Column(): vc_transform1 = gr.Number( - label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 + label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), + value=0, + ) + opt_input = gr.Textbox( + label=i18n("指定输出文件夹"), value="opt" ) - opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") file_index3 = gr.Textbox( label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), value="", @@ -965,9 +993,11 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), - choices=["pm", "harvest", "crepe", "rmvpe"] - if config.dml == False - else ["pm", "harvest", "rmvpe"], + choices=( + ["pm", "harvest", "crepe", "rmvpe"] + if config.dml == False + else ["pm", "harvest", "rmvpe"] + ), value="rmvpe", interactive=True, ) @@ -1002,7 +1032,9 @@ with gr.Blocks(title="RVC WebUI") as app: rms_mix_rate1 = gr.Slider( minimum=0, maximum=1, - label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + label=i18n( + "输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络" + ), value=1, interactive=True, ) @@ -1019,7 +1051,9 @@ with gr.Blocks(title="RVC WebUI") as app: filter_radius1 = gr.Slider( minimum=0, maximum=7, - label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), + label=i18n( + ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音" + ), value=3, step=1, interactive=True, @@ -1033,11 +1067,14 @@ with gr.Blocks(title="RVC WebUI") as app: ) with gr.Row(): dir_input = gr.Textbox( - label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), + label=i18n( + "输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)" + ), placeholder="C:\\Users\\Desktop\\input_vocal_dir", ) inputs = gr.File( - file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") + file_count="multiple", + label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"), ) with gr.Row(): @@ -1086,10 +1123,13 @@ with gr.Blocks(title="RVC WebUI") as app: placeholder="C:\\Users\\Desktop\\todo-songs", ) wav_inputs = gr.File( - file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") + file_count="multiple", + label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"), ) with gr.Column(): - model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names) + model_choose = gr.Dropdown( + label=i18n("模型"), choices=uvr5_names + ) agg = gr.Slider( minimum=0, maximum=20, @@ -1170,7 +1210,8 @@ with gr.Blocks(title="RVC WebUI") as app: ) with gr.Row(): trainset_dir4 = gr.Textbox( - label=i18n("输入训练文件夹路径"), value=i18n("E:\\语音音频+标注\\米津玄师\\src") + label=i18n("输入训练文件夹路径"), + value=i18n("E:\\语音音频+标注\\米津玄师\\src"), ) spk_id5 = gr.Slider( minimum=0, @@ -1189,11 +1230,17 @@ with gr.Blocks(title="RVC WebUI") as app: api_name="train_preprocess", ) with gr.Group(): - gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) + gr.Markdown( + value=i18n( + "step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)" + ) + ) with gr.Row(): with gr.Column(): gpus6 = gr.Textbox( - label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), + label=i18n( + "以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2" + ), value=gpus, interactive=True, visible=F0GPUVisible, @@ -1281,7 +1328,9 @@ with gr.Blocks(title="RVC WebUI") as app: interactive=True, ) if_save_every_weights18 = gr.Radio( - label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + label=i18n( + "是否在每次保存时间点将最终小模型保存至weights文件夹" + ), choices=[i18n("是"), i18n("否")], value=i18n("否"), interactive=True, @@ -1313,7 +1362,9 @@ with gr.Blocks(title="RVC WebUI") as app: [f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], ) gpus16 = gr.Textbox( - label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), + label=i18n( + "以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2" + ), value=gpus, interactive=True, ) @@ -1373,8 +1424,12 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Group(): gr.Markdown(value=i18n("模型融合, 可用于测试音色融合")) with gr.Row(): - ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True) - ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True) + ckpt_a = gr.Textbox( + label=i18n("A模型路径"), value="", interactive=True + ) + ckpt_b = gr.Textbox( + label=i18n("B模型路径"), value="", interactive=True + ) alpha_a = gr.Slider( minimum=0, maximum=1, @@ -1396,7 +1451,10 @@ with gr.Blocks(title="RVC WebUI") as app: interactive=True, ) info__ = gr.Textbox( - label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True + label=i18n("要置入的模型信息"), + value="", + max_lines=8, + interactive=True, ) name_to_save0 = gr.Textbox( label=i18n("保存的模型名不带后缀"), @@ -1429,13 +1487,18 @@ with gr.Blocks(title="RVC WebUI") as app: api_name="ckpt_merge", ) # def merge(path1,path2,alpha1,sr,f0,info): with gr.Group(): - gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")) + gr.Markdown( + value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)") + ) with gr.Row(): ckpt_path0 = gr.Textbox( label=i18n("模型路径"), value="", interactive=True ) info_ = gr.Textbox( - label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True + label=i18n("要改的模型信息"), + value="", + max_lines=8, + interactive=True, ) name_to_save1 = gr.Textbox( label=i18n("保存的文件名, 默认空为和源文件同名"), @@ -1453,7 +1516,9 @@ with gr.Blocks(title="RVC WebUI") as app: api_name="ckpt_modify", ) with gr.Group(): - gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")) + gr.Markdown( + value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)") + ) with gr.Row(): ckpt_path1 = gr.Textbox( label=i18n("模型路径"), value="", interactive=True @@ -1495,7 +1560,10 @@ with gr.Blocks(title="RVC WebUI") as app: interactive=True, ) info___ = gr.Textbox( - label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True + label=i18n("要置入的模型信息"), + value="", + max_lines=8, + interactive=True, ) but9 = gr.Button(i18n("提取"), variant="primary") info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) @@ -1511,7 +1579,9 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.TabItem(i18n("Onnx导出")): with gr.Row(): - ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True) + ckpt_dir = gr.Textbox( + label=i18n("RVC模型路径"), value="", interactive=True + ) with gr.Row(): onnx_dir = gr.Textbox( label=i18n("Onnx输出路径"), value="", interactive=True diff --git a/infer/lib/infer_pack/models.py b/infer/lib/infer_pack/models.py index e489634..47aa485 100644 --- a/infer/lib/infer_pack/models.py +++ b/infer/lib/infer_pack/models.py @@ -400,13 +400,17 @@ class SineGen(torch.nn.Module): f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( idx + 2 ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic - rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化 + rad_values = ( + f0_buf / self.sampling_rate + ) % 1 ###%1意味着n_har的乘积无法后处理优化 rand_ini = torch.rand( f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device ) rand_ini[:, 0] = 0 rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini - tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化 + tmp_over_one = torch.cumsum( + rad_values, 1 + ) # % 1 #####%1意味着后面的cumsum无法再优化 tmp_over_one *= upp tmp_over_one = F.interpolate( tmp_over_one.transpose(2, 1), diff --git a/infer/lib/infer_pack/models_onnx.py b/infer/lib/infer_pack/models_onnx.py index 97308ef..a6d321f 100644 --- a/infer/lib/infer_pack/models_onnx.py +++ b/infer/lib/infer_pack/models_onnx.py @@ -333,13 +333,17 @@ class SineGen(torch.nn.Module): f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( idx + 2 ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic - rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化 + rad_values = ( + f0_buf / self.sampling_rate + ) % 1 ###%1意味着n_har的乘积无法后处理优化 rand_ini = torch.rand( f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device ) rand_ini[:, 0] = 0 rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini - tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化 + tmp_over_one = torch.cumsum( + rad_values, 1 + ) # % 1 #####%1意味着后面的cumsum无法再优化 tmp_over_one *= upp tmp_over_one = F.interpolate( tmp_over_one.transpose(2, 1), diff --git a/infer/modules/ipex/attention.py b/infer/modules/ipex/attention.py index 0cc2803..78a4775 100644 --- a/infer/modules/ipex/attention.py +++ b/infer/modules/ipex/attention.py @@ -62,12 +62,12 @@ def torch_bmm(input, mat2, *, out=None): ): # pylint: disable=invalid-name start_idx_2 = i2 * split_2_slice_size end_idx_2 = (i2 + 1) * split_2_slice_size - hidden_states[ - start_idx:end_idx, start_idx_2:end_idx_2 - ] = original_torch_bmm( - input[start_idx:end_idx, start_idx_2:end_idx_2], - mat2[start_idx:end_idx, start_idx_2:end_idx_2], - out=out, + hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = ( + original_torch_bmm( + input[start_idx:end_idx, start_idx_2:end_idx_2], + mat2[start_idx:end_idx, start_idx_2:end_idx_2], + out=out, + ) ) else: hidden_states[start_idx:end_idx] = original_torch_bmm( @@ -138,61 +138,67 @@ def scaled_dot_product_attention( start_idx_2 = i2 * split_2_slice_size end_idx_2 = (i2 + 1) * split_2_slice_size if no_shape_one: - hidden_states[ - start_idx:end_idx, start_idx_2:end_idx_2 - ] = original_scaled_dot_product_attention( - query[start_idx:end_idx, start_idx_2:end_idx_2], - key[start_idx:end_idx, start_idx_2:end_idx_2], - value[start_idx:end_idx, start_idx_2:end_idx_2], - attn_mask=attn_mask[ - start_idx:end_idx, start_idx_2:end_idx_2 - ] - if attn_mask is not None - else attn_mask, - dropout_p=dropout_p, - is_causal=is_causal, + hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = ( + original_scaled_dot_product_attention( + query[start_idx:end_idx, start_idx_2:end_idx_2], + key[start_idx:end_idx, start_idx_2:end_idx_2], + value[start_idx:end_idx, start_idx_2:end_idx_2], + attn_mask=( + attn_mask[start_idx:end_idx, start_idx_2:end_idx_2] + if attn_mask is not None + else attn_mask + ), + dropout_p=dropout_p, + is_causal=is_causal, + ) ) else: - hidden_states[ - :, start_idx:end_idx, start_idx_2:end_idx_2 - ] = original_scaled_dot_product_attention( - query[:, start_idx:end_idx, start_idx_2:end_idx_2], - key[:, start_idx:end_idx, start_idx_2:end_idx_2], - value[:, start_idx:end_idx, start_idx_2:end_idx_2], - attn_mask=attn_mask[ - :, start_idx:end_idx, start_idx_2:end_idx_2 - ] - if attn_mask is not None - else attn_mask, - dropout_p=dropout_p, - is_causal=is_causal, + hidden_states[:, start_idx:end_idx, start_idx_2:end_idx_2] = ( + original_scaled_dot_product_attention( + query[:, start_idx:end_idx, start_idx_2:end_idx_2], + key[:, start_idx:end_idx, start_idx_2:end_idx_2], + value[:, start_idx:end_idx, start_idx_2:end_idx_2], + attn_mask=( + attn_mask[ + :, start_idx:end_idx, start_idx_2:end_idx_2 + ] + if attn_mask is not None + else attn_mask + ), + dropout_p=dropout_p, + is_causal=is_causal, + ) ) else: if no_shape_one: - hidden_states[ - start_idx:end_idx - ] = original_scaled_dot_product_attention( - query[start_idx:end_idx], - key[start_idx:end_idx], - value[start_idx:end_idx], - attn_mask=attn_mask[start_idx:end_idx] - if attn_mask is not None - else attn_mask, - dropout_p=dropout_p, - is_causal=is_causal, + hidden_states[start_idx:end_idx] = ( + original_scaled_dot_product_attention( + query[start_idx:end_idx], + key[start_idx:end_idx], + value[start_idx:end_idx], + attn_mask=( + attn_mask[start_idx:end_idx] + if attn_mask is not None + else attn_mask + ), + dropout_p=dropout_p, + is_causal=is_causal, + ) ) else: - hidden_states[ - :, start_idx:end_idx - ] = original_scaled_dot_product_attention( - query[:, start_idx:end_idx], - key[:, start_idx:end_idx], - value[:, start_idx:end_idx], - attn_mask=attn_mask[:, start_idx:end_idx] - if attn_mask is not None - else attn_mask, - dropout_p=dropout_p, - is_causal=is_causal, + hidden_states[:, start_idx:end_idx] = ( + original_scaled_dot_product_attention( + query[:, start_idx:end_idx], + key[:, start_idx:end_idx], + value[:, start_idx:end_idx], + attn_mask=( + attn_mask[:, start_idx:end_idx] + if attn_mask is not None + else attn_mask + ), + dropout_p=dropout_p, + is_causal=is_causal, + ) ) else: return original_scaled_dot_product_attention( diff --git a/infer/modules/ipex/hijacks.py b/infer/modules/ipex/hijacks.py index d95fd61..fc75f0c 100644 --- a/infer/modules/ipex/hijacks.py +++ b/infer/modules/ipex/hijacks.py @@ -104,11 +104,11 @@ def return_xpu(device): return ( f"xpu:{device[-1]}" if isinstance(device, str) and ":" in device - else f"xpu:{device}" - if isinstance(device, int) - else torch.device("xpu") - if isinstance(device, torch.device) - else "xpu" + else ( + f"xpu:{device}" + if isinstance(device, int) + else torch.device("xpu") if isinstance(device, torch.device) else "xpu" + ) ) @@ -271,12 +271,16 @@ def ipex_hijacks(): "torch.batch_norm", lambda orig_func, input, weight, bias, *args, **kwargs: orig_func( input, - weight - if weight is not None - else torch.ones(input.size()[1], device=input.device), - bias - if bias is not None - else torch.zeros(input.size()[1], device=input.device), + ( + weight + if weight is not None + else torch.ones(input.size()[1], device=input.device) + ), + ( + bias + if bias is not None + else torch.zeros(input.size()[1], device=input.device) + ), *args, **kwargs, ), @@ -286,12 +290,16 @@ def ipex_hijacks(): "torch.instance_norm", lambda orig_func, input, weight, bias, *args, **kwargs: orig_func( input, - weight - if weight is not None - else torch.ones(input.size()[1], device=input.device), - bias - if bias is not None - else torch.zeros(input.size()[1], device=input.device), + ( + weight + if weight is not None + else torch.ones(input.size()[1], device=input.device) + ), + ( + bias + if bias is not None + else torch.zeros(input.size()[1], device=input.device) + ), *args, **kwargs, ), diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py index c423655..f7c89c7 100644 --- a/infer/modules/train/extract_feature_print.py +++ b/infer/modules/train/extract_feature_print.py @@ -116,9 +116,11 @@ else: feats = readwave(wav_path, normalize=saved_cfg.task.normalize) padding_mask = torch.BoolTensor(feats.shape).fill_(False) inputs = { - "source": feats.half().to(device) - if is_half and device not in ["mps", "cpu"] - else feats.to(device), + "source": ( + feats.half().to(device) + if is_half and device not in ["mps", "cpu"] + else feats.to(device) + ), "padding_mask": padding_mask.to(device), "output_layer": 9 if version == "v1" else 12, # layer 9 } diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 1775012..6f695cc 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -38,26 +38,28 @@ class VC: to_return_protect0 = { "visible": self.if_f0 != 0, - "value": to_return_protect[0] - if self.if_f0 != 0 and to_return_protect - else 0.5, + "value": ( + to_return_protect[0] if self.if_f0 != 0 and to_return_protect else 0.5 + ), "__type__": "update", } to_return_protect1 = { "visible": self.if_f0 != 0, - "value": to_return_protect[1] - if self.if_f0 != 0 and to_return_protect - else 0.33, + "value": ( + to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33 + ), "__type__": "update", } if sid == "" or sid == []: - if self.hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 + if ( + self.hubert_model is not None + ): # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 logger.info("Clean model cache") del (self.net_g, self.n_spk, self.hubert_model, self.tgt_sr) # ,cpt - self.hubert_model = ( - self.net_g - ) = self.n_spk = self.hubert_model = self.tgt_sr = None + self.hubert_model = self.net_g = self.n_spk = self.hubert_model = ( + self.tgt_sr + ) = None if torch.cuda.is_available(): torch.cuda.empty_cache() ###楼下不这么折腾清理不干净 diff --git a/tools/app.py b/tools/app.py index 583b6ef..26901e2 100644 --- a/tools/app.py +++ b/tools/app.py @@ -59,12 +59,18 @@ with app: ) sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item]) gr.Markdown( - value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") + value=i18n( + "男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. " + ) ) vc_input3 = gr.Audio(label="上传音频(长度小于90秒)") - vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0) + vc_transform0 = gr.Number( + label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 + ) f0method0 = gr.Radio( - label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"), + label=i18n( + "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" + ), choices=["pm", "harvest", "crepe", "rmvpe"], value="pm", interactive=True, @@ -72,7 +78,9 @@ with app: filter_radius0 = gr.Slider( minimum=0, maximum=7, - label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), + label=i18n( + ">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音" + ), value=3, step=1, interactive=True, @@ -107,19 +115,25 @@ with app: rms_mix_rate0 = gr.Slider( minimum=0, maximum=1, - label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + label=i18n( + "输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络" + ), value=1, interactive=True, ) protect0 = gr.Slider( minimum=0, maximum=0.5, - label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"), + label=i18n( + "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" + ), value=0.33, step=0.01, interactive=True, ) - f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")) + f0_file = gr.File( + label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调") + ) but0 = gr.Button(i18n("转换"), variant="primary") vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) diff --git a/tools/infer/infer-pm-index256.py b/tools/infer/infer-pm-index256.py index bf92d1d..d6b3b74 100644 --- a/tools/infer/infer-pm-index256.py +++ b/tools/infer/infer-pm-index256.py @@ -2,6 +2,7 @@ 对源特征进行检索 """ + import os import logging diff --git a/tools/infer/train-index-v2.py b/tools/infer/train-index-v2.py index 70f90a8..47c706c 100644 --- a/tools/infer/train-index-v2.py +++ b/tools/infer/train-index-v2.py @@ -1,6 +1,7 @@ """ 格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个 """ + import os import traceback import logging diff --git a/tools/infer/train-index.py b/tools/infer/train-index.py index 39d64c4..400adc0 100644 --- a/tools/infer/train-index.py +++ b/tools/infer/train-index.py @@ -1,6 +1,7 @@ """ 格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个 """ + import os import logging diff --git a/tools/onnx_inference_demo.py b/tools/onnx_inference_demo.py index bd9ef1c..5ba12ae 100644 --- a/tools/onnx_inference_demo.py +++ b/tools/onnx_inference_demo.py @@ -8,7 +8,9 @@ f0_up_key = 0 # 升降调 sid = 0 # 角色ID f0_method = "dio" # F0提取算法 model_path = "ShirohaRVC.onnx" # 模型的完整路径 -vec_name = "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型 +vec_name = ( + "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型 +) wav_path = "123.wav" # 输入路径或ByteIO实例 out_path = "out.wav" # 输出路径或ByteIO实例 diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py index 06e7056..9a7399c 100644 --- a/tools/rvc_for_realtime.py +++ b/tools/rvc_for_realtime.py @@ -279,15 +279,17 @@ class RVC: f0 = f0[2:-3] else: f0 = f0[2:] - f0bak[ - part_length * idx // 160 : part_length * idx // 160 + f0.shape[0] - ] = f0 + f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = ( + f0 + ) f0bak = signal.medfilt(f0bak, 3) f0bak *= pow(2, f0_up_key / 12) return self.get_f0_post(f0bak) def get_f0_crepe(self, x, f0_up_key): - if "privateuseone" in str(self.device): ###不支持dml,cpu又太慢用不成,拿fcpe顶替 + if "privateuseone" in str( + self.device + ): ###不支持dml,cpu又太慢用不成,拿fcpe顶替 return self.get_f0(x, f0_up_key, 1, "fcpe") # printt("using crepe,device:%s"%self.device) f0, pd = torchcrepe.predict( diff --git a/tools/torchgate/__init__.py b/tools/torchgate/__init__.py index b4a1267..8c1b549 100644 --- a/tools/torchgate/__init__.py +++ b/tools/torchgate/__init__.py @@ -9,4 +9,5 @@ torchgate imports all the functions from PyTorch, and in addition provides: TorchGating --- A PyTorch module that applies a spectral gate to an input signal """ + from .torchgate import TorchGate