Merge pull request #1769 from RVC-Project/formatter-dev

chore(format): run black on dev
This commit is contained in:
RVC-Boss 2024-01-26 16:10:23 +08:00 committed by GitHub
commit c09d1bcfac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 266 additions and 148 deletions

View File

@ -877,11 +877,11 @@ if __name__ == "__main__":
self.input_wav_denoise[-self.block_frame - 2 * self.zc :] self.input_wav_denoise[-self.block_frame - 2 * self.zc :]
)[160:] )[160:]
else: else:
self.input_wav_res[ self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = (
-160 * (indata.shape[0] // self.zc + 1) : self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[
] = self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[ 160:
160: ]
] )
# infer # infer
if self.function == "vc": if self.function == "vc":
infer_wav = self.rvc.infer( infer_wav = self.rvc.infer(

View File

@ -93,7 +93,9 @@ if torch.cuda.is_available() or ngpu != 0:
"90", "90",
"M4", "M4",
"T4", "T4",
"TITAN","4060", "L", "TITAN",
"4060",
"L",
"6000", "6000",
] ]
): ):
@ -415,12 +417,16 @@ def get_pretrained_models(path_str, f0_str, sr2):
sr2, sr2,
) )
return ( return (
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) (
if if_pretrained_generator_exist "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
else "", if if_pretrained_generator_exist
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) else ""
if if_pretrained_discriminator_exist ),
else "", (
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else ""
),
) )
@ -762,7 +768,9 @@ def train1key(
if_save_every_weights18, if_save_every_weights18,
version19, version19,
) )
yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) yield get_info_str(
i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")
)
# step3b:训练索引 # step3b:训练索引
[get_info_str(_) for _ in train_index(exp_dir1, version19)] [get_info_str(_) for _ in train_index(exp_dir1, version19)]
@ -809,7 +817,9 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Row(): with gr.Row():
sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names)) sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
with gr.Column(): with gr.Column():
refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary") refresh_button = gr.Button(
i18n("刷新音色列表和索引路径"), variant="primary"
)
clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
spk_item = gr.Slider( spk_item = gr.Slider(
minimum=0, minimum=0,
@ -828,14 +838,19 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
vc_transform0 = gr.Number( vc_transform0 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
value=0,
) )
input_audio0 = gr.Textbox( input_audio0 = gr.Textbox(
label=i18n("输入待处理音频文件路径(默认是正确格式示例)"), label=i18n(
"输入待处理音频文件路径(默认是正确格式示例)"
),
placeholder="C:\\Users\\Desktop\\audio_example.wav", placeholder="C:\\Users\\Desktop\\audio_example.wav",
) )
file_index1 = gr.Textbox( file_index1 = gr.Textbox(
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), label=i18n(
"特征检索库文件路径,为空则使用下拉的选择结果"
),
placeholder="C:\\Users\\Desktop\\model_example.index", placeholder="C:\\Users\\Desktop\\model_example.index",
interactive=True, interactive=True,
) )
@ -848,9 +863,11 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n( label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
), ),
choices=["pm", "harvest", "crepe", "rmvpe"] choices=(
if config.dml == False ["pm", "harvest", "crepe", "rmvpe"]
else ["pm", "harvest", "rmvpe"], if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe", value="rmvpe",
interactive=True, interactive=True,
) )
@ -867,7 +884,9 @@ with gr.Blocks(title="RVC WebUI") as app:
rms_mix_rate0 = gr.Slider( rms_mix_rate0 = gr.Slider(
minimum=0, minimum=0,
maximum=1, maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"), label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=0.25, value=0.25,
interactive=True, interactive=True,
) )
@ -899,7 +918,9 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
f0_file = gr.File( f0_file = gr.File(
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), label=i18n(
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
),
visible=False, visible=False,
) )
@ -919,7 +940,9 @@ with gr.Blocks(title="RVC WebUI") as app:
but0 = gr.Button(i18n("转换"), variant="primary") but0 = gr.Button(i18n("转换"), variant="primary")
with gr.Row(): with gr.Row():
vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) vc_output2 = gr.Audio(
label=i18n("输出音频(右下角三个点,点了可以下载)")
)
but0.click( but0.click(
vc.vc_single, vc.vc_single,
@ -943,14 +966,19 @@ with gr.Blocks(title="RVC WebUI") as app:
) )
with gr.TabItem(i18n("批量推理")): with gr.TabItem(i18n("批量推理")):
gr.Markdown( gr.Markdown(
value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ") value=i18n(
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. "
)
) )
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
vc_transform1 = gr.Number( vc_transform1 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
value=0,
)
opt_input = gr.Textbox(
label=i18n("指定输出文件夹"), value="opt"
) )
opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
file_index3 = gr.Textbox( file_index3 = gr.Textbox(
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
value="", value="",
@ -965,9 +993,11 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n( label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
), ),
choices=["pm", "harvest", "crepe", "rmvpe"] choices=(
if config.dml == False ["pm", "harvest", "crepe", "rmvpe"]
else ["pm", "harvest", "rmvpe"], if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe", value="rmvpe",
interactive=True, interactive=True,
) )
@ -1002,7 +1032,9 @@ with gr.Blocks(title="RVC WebUI") as app:
rms_mix_rate1 = gr.Slider( rms_mix_rate1 = gr.Slider(
minimum=0, minimum=0,
maximum=1, maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"), label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=1, value=1,
interactive=True, interactive=True,
) )
@ -1019,7 +1051,9 @@ with gr.Blocks(title="RVC WebUI") as app:
filter_radius1 = gr.Slider( filter_radius1 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"
),
value=3, value=3,
step=1, step=1,
interactive=True, interactive=True,
@ -1033,11 +1067,14 @@ with gr.Blocks(title="RVC WebUI") as app:
) )
with gr.Row(): with gr.Row():
dir_input = gr.Textbox( dir_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), label=i18n(
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"
),
placeholder="C:\\Users\\Desktop\\input_vocal_dir", placeholder="C:\\Users\\Desktop\\input_vocal_dir",
) )
inputs = gr.File( inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") file_count="multiple",
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
) )
with gr.Row(): with gr.Row():
@ -1086,10 +1123,13 @@ with gr.Blocks(title="RVC WebUI") as app:
placeholder="C:\\Users\\Desktop\\todo-songs", placeholder="C:\\Users\\Desktop\\todo-songs",
) )
wav_inputs = gr.File( wav_inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") file_count="multiple",
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
) )
with gr.Column(): with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names) model_choose = gr.Dropdown(
label=i18n("模型"), choices=uvr5_names
)
agg = gr.Slider( agg = gr.Slider(
minimum=0, minimum=0,
maximum=20, maximum=20,
@ -1170,7 +1210,8 @@ with gr.Blocks(title="RVC WebUI") as app:
) )
with gr.Row(): with gr.Row():
trainset_dir4 = gr.Textbox( trainset_dir4 = gr.Textbox(
label=i18n("输入训练文件夹路径"), value=i18n("E:\\语音音频+标注\\米津玄师\\src") label=i18n("输入训练文件夹路径"),
value=i18n("E:\\语音音频+标注\\米津玄师\\src"),
) )
spk_id5 = gr.Slider( spk_id5 = gr.Slider(
minimum=0, minimum=0,
@ -1189,11 +1230,17 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="train_preprocess", api_name="train_preprocess",
) )
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) gr.Markdown(
value=i18n(
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"
)
)
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
gpus6 = gr.Textbox( gpus6 = gr.Textbox(
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), label=i18n(
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
),
value=gpus, value=gpus,
interactive=True, interactive=True,
visible=F0GPUVisible, visible=F0GPUVisible,
@ -1281,7 +1328,9 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
if_save_every_weights18 = gr.Radio( if_save_every_weights18 = gr.Radio(
label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), label=i18n(
"是否在每次保存时间点将最终小模型保存至weights文件夹"
),
choices=[i18n(""), i18n("")], choices=[i18n(""), i18n("")],
value=i18n(""), value=i18n(""),
interactive=True, interactive=True,
@ -1313,7 +1362,9 @@ with gr.Blocks(title="RVC WebUI") as app:
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], [f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15],
) )
gpus16 = gr.Textbox( gpus16 = gr.Textbox(
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), label=i18n(
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
),
value=gpus, value=gpus,
interactive=True, interactive=True,
) )
@ -1373,8 +1424,12 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("模型融合, 可用于测试音色融合")) gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
with gr.Row(): with gr.Row():
ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True) ckpt_a = gr.Textbox(
ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True) label=i18n("A模型路径"), value="", interactive=True
)
ckpt_b = gr.Textbox(
label=i18n("B模型路径"), value="", interactive=True
)
alpha_a = gr.Slider( alpha_a = gr.Slider(
minimum=0, minimum=0,
maximum=1, maximum=1,
@ -1396,7 +1451,10 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
info__ = gr.Textbox( info__ = gr.Textbox(
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True label=i18n("要置入的模型信息"),
value="",
max_lines=8,
interactive=True,
) )
name_to_save0 = gr.Textbox( name_to_save0 = gr.Textbox(
label=i18n("保存的模型名不带后缀"), label=i18n("保存的模型名不带后缀"),
@ -1429,13 +1487,18 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="ckpt_merge", api_name="ckpt_merge",
) # def merge(path1,path2,alpha1,sr,f0,info): ) # def merge(path1,path2,alpha1,sr,f0,info):
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")) gr.Markdown(
value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")
)
with gr.Row(): with gr.Row():
ckpt_path0 = gr.Textbox( ckpt_path0 = gr.Textbox(
label=i18n("模型路径"), value="", interactive=True label=i18n("模型路径"), value="", interactive=True
) )
info_ = gr.Textbox( info_ = gr.Textbox(
label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True label=i18n("要改的模型信息"),
value="",
max_lines=8,
interactive=True,
) )
name_to_save1 = gr.Textbox( name_to_save1 = gr.Textbox(
label=i18n("保存的文件名, 默认空为和源文件同名"), label=i18n("保存的文件名, 默认空为和源文件同名"),
@ -1453,7 +1516,9 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="ckpt_modify", api_name="ckpt_modify",
) )
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")) gr.Markdown(
value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")
)
with gr.Row(): with gr.Row():
ckpt_path1 = gr.Textbox( ckpt_path1 = gr.Textbox(
label=i18n("模型路径"), value="", interactive=True label=i18n("模型路径"), value="", interactive=True
@ -1495,7 +1560,10 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
info___ = gr.Textbox( info___ = gr.Textbox(
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True label=i18n("要置入的模型信息"),
value="",
max_lines=8,
interactive=True,
) )
but9 = gr.Button(i18n("提取"), variant="primary") but9 = gr.Button(i18n("提取"), variant="primary")
info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
@ -1511,7 +1579,9 @@ with gr.Blocks(title="RVC WebUI") as app:
with gr.TabItem(i18n("Onnx导出")): with gr.TabItem(i18n("Onnx导出")):
with gr.Row(): with gr.Row():
ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True) ckpt_dir = gr.Textbox(
label=i18n("RVC模型路径"), value="", interactive=True
)
with gr.Row(): with gr.Row():
onnx_dir = gr.Textbox( onnx_dir = gr.Textbox(
label=i18n("Onnx输出路径"), value="", interactive=True label=i18n("Onnx输出路径"), value="", interactive=True

View File

@ -400,13 +400,17 @@ class SineGen(torch.nn.Module):
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
idx + 2 idx + 2
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化 rad_values = (
f0_buf / self.sampling_rate
) % 1 ###%1意味着n_har的乘积无法后处理优化
rand_ini = torch.rand( rand_ini = torch.rand(
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
) )
rand_ini[:, 0] = 0 rand_ini[:, 0] = 0
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化 tmp_over_one = torch.cumsum(
rad_values, 1
) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one *= upp tmp_over_one *= upp
tmp_over_one = F.interpolate( tmp_over_one = F.interpolate(
tmp_over_one.transpose(2, 1), tmp_over_one.transpose(2, 1),

View File

@ -333,13 +333,17 @@ class SineGen(torch.nn.Module):
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
idx + 2 idx + 2
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化 rad_values = (
f0_buf / self.sampling_rate
) % 1 ###%1意味着n_har的乘积无法后处理优化
rand_ini = torch.rand( rand_ini = torch.rand(
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
) )
rand_ini[:, 0] = 0 rand_ini[:, 0] = 0
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化 tmp_over_one = torch.cumsum(
rad_values, 1
) # % 1 #####%1意味着后面的cumsum无法再优化
tmp_over_one *= upp tmp_over_one *= upp
tmp_over_one = F.interpolate( tmp_over_one = F.interpolate(
tmp_over_one.transpose(2, 1), tmp_over_one.transpose(2, 1),

View File

@ -62,12 +62,12 @@ def torch_bmm(input, mat2, *, out=None):
): # pylint: disable=invalid-name ): # pylint: disable=invalid-name
start_idx_2 = i2 * split_2_slice_size start_idx_2 = i2 * split_2_slice_size
end_idx_2 = (i2 + 1) * split_2_slice_size end_idx_2 = (i2 + 1) * split_2_slice_size
hidden_states[ hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
start_idx:end_idx, start_idx_2:end_idx_2 original_torch_bmm(
] = original_torch_bmm( input[start_idx:end_idx, start_idx_2:end_idx_2],
input[start_idx:end_idx, start_idx_2:end_idx_2], mat2[start_idx:end_idx, start_idx_2:end_idx_2],
mat2[start_idx:end_idx, start_idx_2:end_idx_2], out=out,
out=out, )
) )
else: else:
hidden_states[start_idx:end_idx] = original_torch_bmm( hidden_states[start_idx:end_idx] = original_torch_bmm(
@ -138,61 +138,67 @@ def scaled_dot_product_attention(
start_idx_2 = i2 * split_2_slice_size start_idx_2 = i2 * split_2_slice_size
end_idx_2 = (i2 + 1) * split_2_slice_size end_idx_2 = (i2 + 1) * split_2_slice_size
if no_shape_one: if no_shape_one:
hidden_states[ hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
start_idx:end_idx, start_idx_2:end_idx_2 original_scaled_dot_product_attention(
] = original_scaled_dot_product_attention( query[start_idx:end_idx, start_idx_2:end_idx_2],
query[start_idx:end_idx, start_idx_2:end_idx_2], key[start_idx:end_idx, start_idx_2:end_idx_2],
key[start_idx:end_idx, start_idx_2:end_idx_2], value[start_idx:end_idx, start_idx_2:end_idx_2],
value[start_idx:end_idx, start_idx_2:end_idx_2], attn_mask=(
attn_mask=attn_mask[ attn_mask[start_idx:end_idx, start_idx_2:end_idx_2]
start_idx:end_idx, start_idx_2:end_idx_2 if attn_mask is not None
] else attn_mask
if attn_mask is not None ),
else attn_mask, dropout_p=dropout_p,
dropout_p=dropout_p, is_causal=is_causal,
is_causal=is_causal, )
) )
else: else:
hidden_states[ hidden_states[:, start_idx:end_idx, start_idx_2:end_idx_2] = (
:, start_idx:end_idx, start_idx_2:end_idx_2 original_scaled_dot_product_attention(
] = original_scaled_dot_product_attention( query[:, start_idx:end_idx, start_idx_2:end_idx_2],
query[:, start_idx:end_idx, start_idx_2:end_idx_2], key[:, start_idx:end_idx, start_idx_2:end_idx_2],
key[:, start_idx:end_idx, start_idx_2:end_idx_2], value[:, start_idx:end_idx, start_idx_2:end_idx_2],
value[:, start_idx:end_idx, start_idx_2:end_idx_2], attn_mask=(
attn_mask=attn_mask[ attn_mask[
:, start_idx:end_idx, start_idx_2:end_idx_2 :, start_idx:end_idx, start_idx_2:end_idx_2
] ]
if attn_mask is not None if attn_mask is not None
else attn_mask, else attn_mask
dropout_p=dropout_p, ),
is_causal=is_causal, dropout_p=dropout_p,
is_causal=is_causal,
)
) )
else: else:
if no_shape_one: if no_shape_one:
hidden_states[ hidden_states[start_idx:end_idx] = (
start_idx:end_idx original_scaled_dot_product_attention(
] = original_scaled_dot_product_attention( query[start_idx:end_idx],
query[start_idx:end_idx], key[start_idx:end_idx],
key[start_idx:end_idx], value[start_idx:end_idx],
value[start_idx:end_idx], attn_mask=(
attn_mask=attn_mask[start_idx:end_idx] attn_mask[start_idx:end_idx]
if attn_mask is not None if attn_mask is not None
else attn_mask, else attn_mask
dropout_p=dropout_p, ),
is_causal=is_causal, dropout_p=dropout_p,
is_causal=is_causal,
)
) )
else: else:
hidden_states[ hidden_states[:, start_idx:end_idx] = (
:, start_idx:end_idx original_scaled_dot_product_attention(
] = original_scaled_dot_product_attention( query[:, start_idx:end_idx],
query[:, start_idx:end_idx], key[:, start_idx:end_idx],
key[:, start_idx:end_idx], value[:, start_idx:end_idx],
value[:, start_idx:end_idx], attn_mask=(
attn_mask=attn_mask[:, start_idx:end_idx] attn_mask[:, start_idx:end_idx]
if attn_mask is not None if attn_mask is not None
else attn_mask, else attn_mask
dropout_p=dropout_p, ),
is_causal=is_causal, dropout_p=dropout_p,
is_causal=is_causal,
)
) )
else: else:
return original_scaled_dot_product_attention( return original_scaled_dot_product_attention(

View File

@ -104,11 +104,11 @@ def return_xpu(device):
return ( return (
f"xpu:{device[-1]}" f"xpu:{device[-1]}"
if isinstance(device, str) and ":" in device if isinstance(device, str) and ":" in device
else f"xpu:{device}" else (
if isinstance(device, int) f"xpu:{device}"
else torch.device("xpu") if isinstance(device, int)
if isinstance(device, torch.device) else torch.device("xpu") if isinstance(device, torch.device) else "xpu"
else "xpu" )
) )
@ -271,12 +271,16 @@ def ipex_hijacks():
"torch.batch_norm", "torch.batch_norm",
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func( lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
input, input,
weight (
if weight is not None weight
else torch.ones(input.size()[1], device=input.device), if weight is not None
bias else torch.ones(input.size()[1], device=input.device)
if bias is not None ),
else torch.zeros(input.size()[1], device=input.device), (
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device)
),
*args, *args,
**kwargs, **kwargs,
), ),
@ -286,12 +290,16 @@ def ipex_hijacks():
"torch.instance_norm", "torch.instance_norm",
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func( lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
input, input,
weight (
if weight is not None weight
else torch.ones(input.size()[1], device=input.device), if weight is not None
bias else torch.ones(input.size()[1], device=input.device)
if bias is not None ),
else torch.zeros(input.size()[1], device=input.device), (
bias
if bias is not None
else torch.zeros(input.size()[1], device=input.device)
),
*args, *args,
**kwargs, **kwargs,
), ),

View File

@ -116,9 +116,11 @@ else:
feats = readwave(wav_path, normalize=saved_cfg.task.normalize) feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
padding_mask = torch.BoolTensor(feats.shape).fill_(False) padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = { inputs = {
"source": feats.half().to(device) "source": (
if is_half and device not in ["mps", "cpu"] feats.half().to(device)
else feats.to(device), if is_half and device not in ["mps", "cpu"]
else feats.to(device)
),
"padding_mask": padding_mask.to(device), "padding_mask": padding_mask.to(device),
"output_layer": 9 if version == "v1" else 12, # layer 9 "output_layer": 9 if version == "v1" else 12, # layer 9
} }

View File

@ -38,26 +38,28 @@ class VC:
to_return_protect0 = { to_return_protect0 = {
"visible": self.if_f0 != 0, "visible": self.if_f0 != 0,
"value": to_return_protect[0] "value": (
if self.if_f0 != 0 and to_return_protect to_return_protect[0] if self.if_f0 != 0 and to_return_protect else 0.5
else 0.5, ),
"__type__": "update", "__type__": "update",
} }
to_return_protect1 = { to_return_protect1 = {
"visible": self.if_f0 != 0, "visible": self.if_f0 != 0,
"value": to_return_protect[1] "value": (
if self.if_f0 != 0 and to_return_protect to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33
else 0.33, ),
"__type__": "update", "__type__": "update",
} }
if sid == "" or sid == []: if sid == "" or sid == []:
if self.hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 if (
self.hubert_model is not None
): # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
logger.info("Clean model cache") logger.info("Clean model cache")
del (self.net_g, self.n_spk, self.hubert_model, self.tgt_sr) # ,cpt del (self.net_g, self.n_spk, self.hubert_model, self.tgt_sr) # ,cpt
self.hubert_model = ( self.hubert_model = self.net_g = self.n_spk = self.hubert_model = (
self.net_g self.tgt_sr
) = self.n_spk = self.hubert_model = self.tgt_sr = None ) = None
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() torch.cuda.empty_cache()
###楼下不这么折腾清理不干净 ###楼下不这么折腾清理不干净

View File

@ -59,12 +59,18 @@ with app:
) )
sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item]) sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item])
gr.Markdown( gr.Markdown(
value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") value=i18n(
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. "
)
) )
vc_input3 = gr.Audio(label="上传音频长度小于90秒") vc_input3 = gr.Audio(label="上传音频长度小于90秒")
vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0) vc_transform0 = gr.Number(
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
)
f0method0 = gr.Radio( f0method0 = gr.Radio(
label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"), label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
),
choices=["pm", "harvest", "crepe", "rmvpe"], choices=["pm", "harvest", "crepe", "rmvpe"],
value="pm", value="pm",
interactive=True, interactive=True,
@ -72,7 +78,9 @@ with app:
filter_radius0 = gr.Slider( filter_radius0 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"
),
value=3, value=3,
step=1, step=1,
interactive=True, interactive=True,
@ -107,19 +115,25 @@ with app:
rms_mix_rate0 = gr.Slider( rms_mix_rate0 = gr.Slider(
minimum=0, minimum=0,
maximum=1, maximum=1,
label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"), label=i18n(
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=1, value=1,
interactive=True, interactive=True,
) )
protect0 = gr.Slider( protect0 = gr.Slider(
minimum=0, minimum=0,
maximum=0.5, maximum=0.5,
label=i18n("保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"), label=i18n(
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"
),
value=0.33, value=0.33,
step=0.01, step=0.01,
interactive=True, interactive=True,
) )
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")) f0_file = gr.File(
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")
)
but0 = gr.Button(i18n("转换"), variant="primary") but0 = gr.Button(i18n("转换"), variant="primary")
vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))

View File

@ -2,6 +2,7 @@
对源特征进行检索 对源特征进行检索
""" """
import os import os
import logging import logging

View File

@ -1,6 +1,7 @@
""" """
格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个 格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个
""" """
import os import os
import traceback import traceback
import logging import logging

View File

@ -1,6 +1,7 @@
""" """
格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个 格式直接cid为自带的index位aid放不下了通过字典来查反正就5w个
""" """
import os import os
import logging import logging

View File

@ -8,7 +8,9 @@ f0_up_key = 0 # 升降调
sid = 0 # 角色ID sid = 0 # 角色ID
f0_method = "dio" # F0提取算法 f0_method = "dio" # F0提取算法
model_path = "ShirohaRVC.onnx" # 模型的完整路径 model_path = "ShirohaRVC.onnx" # 模型的完整路径
vec_name = "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型 vec_name = (
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
)
wav_path = "123.wav" # 输入路径或ByteIO实例 wav_path = "123.wav" # 输入路径或ByteIO实例
out_path = "out.wav" # 输出路径或ByteIO实例 out_path = "out.wav" # 输出路径或ByteIO实例

View File

@ -279,15 +279,17 @@ class RVC:
f0 = f0[2:-3] f0 = f0[2:-3]
else: else:
f0 = f0[2:] f0 = f0[2:]
f0bak[ f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = (
part_length * idx // 160 : part_length * idx // 160 + f0.shape[0] f0
] = f0 )
f0bak = signal.medfilt(f0bak, 3) f0bak = signal.medfilt(f0bak, 3)
f0bak *= pow(2, f0_up_key / 12) f0bak *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0bak) return self.get_f0_post(f0bak)
def get_f0_crepe(self, x, f0_up_key): def get_f0_crepe(self, x, f0_up_key):
if "privateuseone" in str(self.device): ###不支持dmlcpu又太慢用不成拿fcpe顶替 if "privateuseone" in str(
self.device
): ###不支持dmlcpu又太慢用不成拿fcpe顶替
return self.get_f0(x, f0_up_key, 1, "fcpe") return self.get_f0(x, f0_up_key, 1, "fcpe")
# printt("using crepe,device:%s"%self.device) # printt("using crepe,device:%s"%self.device)
f0, pd = torchcrepe.predict( f0, pd = torchcrepe.predict(

View File

@ -9,4 +9,5 @@ torchgate imports all the functions from PyTorch, and in addition provides:
TorchGating --- A PyTorch module that applies a spectral gate to an input signal TorchGating --- A PyTorch module that applies a spectral gate to an input signal
""" """
from .torchgate import TorchGate from .torchgate import TorchGate