Revert the field and add auto population instead

This commit is contained in:
VSlobolinskyi 2025-03-18 11:01:49 +02:00
parent e658a85ccc
commit a82621dd81
3 changed files with 48 additions and 43 deletions

View File

@ -134,6 +134,20 @@ for name in os.listdir(weight_root):
names.append(name) names.append(name)
index_paths = [] index_paths = []
def update_audio_path(uploaded_file):
# If no file was uploaded, return an empty string.
if uploaded_file is None:
return ""
# If multiple files were allowed, take the first one.
if isinstance(uploaded_file, list):
uploaded_file = uploaded_file[0]
# Depending on Gradio version, the file may be a dict or an object with a 'name' attribute.
if isinstance(uploaded_file, dict):
return uploaded_file.get("name", "")
if hasattr(uploaded_file, "name"):
return uploaded_file.name
return str(uploaded_file)
def lookup_indices(index_root): def lookup_indices(index_root):
global index_paths global index_paths
@ -518,16 +532,25 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
value=0, value=0,
) )
input_audio0 = gr.File( # Add a file uploader for drag & drop.
label=i18n("输入待处理音频文件路径(默认是正确格式示例)"), audio_upload = gr.File(
label=i18n("拖拽或选择音频文件"),
file_types=[".wav"],
file_count="single", file_count="single",
file_types=[".index"], interactive=True,
interactive=True )
# Existing textbox for the audio file path.
input_audio0 = gr.Textbox(
label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
placeholder="C:\\Users\\Desktop\\model_example.wav",
interactive=True,
)
# When a file is uploaded, update the textbox.
audio_upload.change(
fn=update_audio_path, inputs=audio_upload, outputs=input_audio0
) )
file_index1 = gr.Textbox( file_index1 = gr.Textbox(
label=i18n( label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
"特征检索库文件路径,为空则使用下拉的选择结果"
),
placeholder="C:\\Users\\Desktop\\model_example.index", placeholder="C:\\Users\\Desktop\\model_example.index",
interactive=True, interactive=True,
) )
@ -540,11 +563,7 @@ with gr.Blocks(title="RVC WebUI") as app:
label=i18n( label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
), ),
choices=( choices=(["pm", "harvest", "crepe", "rmvpe"] if config.dml == False else ["pm", "harvest", "rmvpe"]),
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe", value="rmvpe",
interactive=True, interactive=True,
) )
@ -561,18 +580,14 @@ with gr.Blocks(title="RVC WebUI") as app:
rms_mix_rate0 = gr.Slider( rms_mix_rate0 = gr.Slider(
minimum=0, minimum=0,
maximum=1, maximum=1,
label=i18n( label=i18n("输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"),
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络"
),
value=0.25, value=0.25,
interactive=True, interactive=True,
) )
protect0 = gr.Slider( protect0 = gr.Slider(
minimum=0, minimum=0,
maximum=0.5, maximum=0.5,
label=i18n( label=i18n("保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果"
),
value=0.33, value=0.33,
step=0.01, step=0.01,
interactive=True, interactive=True,
@ -580,9 +595,7 @@ with gr.Blocks(title="RVC WebUI") as app:
filter_radius0 = gr.Slider( filter_radius0 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n( label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"
),
value=3, value=3,
step=1, step=1,
interactive=True, interactive=True,
@ -595,9 +608,7 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
f0_file = gr.File( f0_file = gr.File(
label=i18n( label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"),
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
),
visible=False, visible=False,
) )
@ -612,10 +623,7 @@ with gr.Blocks(title="RVC WebUI") as app:
but0 = gr.Button(i18n("转换"), variant="primary") but0 = gr.Button(i18n("转换"), variant="primary")
with gr.Row(): with gr.Row():
vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio( vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
label=i18n("输出音频(右下角三个点,点了可以下载)")
)
but0.click( but0.click(
vc.vc_single, vc.vc_single,
[ [
@ -758,7 +766,6 @@ with gr.Blocks(title="RVC WebUI") as app:
f0method1, f0method1,
file_index3, file_index3,
file_index4, file_index4,
# file_big_npy2,
index_rate2, index_rate2,
filter_radius1, filter_radius1,
resample_sr1, resample_sr1,

View File

@ -146,7 +146,7 @@ class VC:
def vc_single( def vc_single(
self, self,
sid, sid,
audio_file, input_audio_path,
f0_up_key, f0_up_key,
f0_file, f0_file,
f0_method, f0_method,
@ -158,12 +158,11 @@ class VC:
rms_mix_rate, rms_mix_rate,
protect, protect,
): ):
if audio_file is None: if input_audio_path is None:
return "You need to upload an audio", None return "You need to upload an audio", None
f0_up_key = int(f0_up_key) f0_up_key = int(f0_up_key)
try: try:
audio = load_audio(audio_file, 16000) audio = load_audio(input_audio_path, 16000)
audio_max = np.abs(audio).max() / 0.95 audio_max = np.abs(audio).max() / 0.95
if audio_max > 1: if audio_max > 1:
audio /= audio_max audio /= audio_max
@ -184,14 +183,14 @@ class VC:
elif file_index2: elif file_index2:
file_index = file_index2 file_index = file_index2
else: else:
file_index = "" # Prevents potential mistakes by auto-replacing to an empty string file_index = "" # 防止小白写错,自动帮他替换掉
audio_opt = self.pipeline.pipeline( audio_opt = self.pipeline.pipeline(
self.hubert_model, self.hubert_model,
self.net_g, self.net_g,
sid, sid,
audio, audio,
audio_file, input_audio_path,
times, times,
f0_up_key, f0_up_key,
f0_method, f0_method,
@ -268,7 +267,6 @@ class VC:
f0_method, f0_method,
file_index, file_index,
file_index2, file_index2,
# file_big_npy,
index_rate, index_rate,
filter_radius, filter_radius,
resample_sr, resample_sr,

View File

@ -27,8 +27,8 @@ input_audio_path2wav = {}
@lru_cache @lru_cache
def cache_harvest_f0(audio_file, fs, f0max, f0min, frame_period): def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
audio = input_audio_path2wav[audio_file] audio = input_audio_path2wav[input_audio_path]
f0, t = pyworld.harvest( f0, t = pyworld.harvest(
audio, audio,
fs=fs, fs=fs,
@ -83,7 +83,7 @@ class Pipeline(object):
def get_f0( def get_f0(
self, self,
audio_file, input_audio_path,
x, x,
p_len, p_len,
f0_up_key, f0_up_key,
@ -114,8 +114,8 @@ class Pipeline(object):
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
) )
elif f0_method == "harvest": elif f0_method == "harvest":
input_audio_path2wav[audio_file] = x.astype(np.double) input_audio_path2wav[input_audio_path] = x.astype(np.double)
f0 = cache_harvest_f0(audio_file, self.sr, f0_max, f0_min, 10) f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if filter_radius > 2: if filter_radius > 2:
f0 = signal.medfilt(f0, 3) f0 = signal.medfilt(f0, 3)
elif f0_method == "crepe": elif f0_method == "crepe":
@ -284,7 +284,7 @@ class Pipeline(object):
net_g, net_g,
sid, sid,
audio, audio,
audio_file, input_audio_path,
times, times,
f0_up_key, f0_up_key,
f0_method, f0_method,
@ -352,7 +352,7 @@ class Pipeline(object):
pitch, pitchf = None, None pitch, pitchf = None, None
if if_f0 == 1: if if_f0 == 1:
pitch, pitchf = self.get_f0( pitch, pitchf = self.get_f0(
audio_file, input_audio_path,
audio_pad, audio_pad,
p_len, p_len,
f0_up_key, f0_up_key,