mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-04-05 04:08:58 +08:00
Stream audio output
This commit is contained in:
parent
87e06f30ec
commit
249c5d1049
@ -20,9 +20,9 @@ def build_standalone_ui():
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app = build_merged_ui()
|
app = build_merged_ui()
|
||||||
if config.iscolab:
|
if config.iscolab:
|
||||||
app.launch(share=True)
|
app.queue().launch(share=True)
|
||||||
else:
|
else:
|
||||||
app.launch(
|
app.queue().launch(
|
||||||
server_name="localhost",
|
server_name="localhost",
|
||||||
inbrowser=not config.noautoopen,
|
inbrowser=not config.noautoopen,
|
||||||
server_port=config.listen_port,
|
server_port=config.listen_port,
|
||||||
|
@ -17,7 +17,7 @@ def build_merged_ui():
|
|||||||
with gr.Tabs():
|
with gr.Tabs():
|
||||||
with gr.TabItem("TTS-to-RVC Pipeline"):
|
with gr.TabItem("TTS-to-RVC Pipeline"):
|
||||||
gr.Markdown("### Generate speech with Spark TTS and convert with RVC")
|
gr.Markdown("### Generate speech with Spark TTS and convert with RVC")
|
||||||
gr.Markdown("*Note: For multi-sentence text, each sentence will be processed separately and then combined.*")
|
gr.Markdown("*Note: For multi-sentence text, each sentence will be processed separately and streamed as it’s ready.*")
|
||||||
|
|
||||||
# TTS Generation Section
|
# TTS Generation Section
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@ -131,9 +131,9 @@ def build_merged_ui():
|
|||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
vc_output1 = gr.Textbox(label="Output information", lines=10)
|
vc_output1 = gr.Textbox(label="Output information", lines=10)
|
||||||
vc_output2 = gr.Audio(label="Final concatenated audio")
|
vc_output2 = gr.Audio(label="Streaming concatenated audio", autoplay=True)
|
||||||
|
|
||||||
# Connect generate function to button
|
# Connect generate function to button with streaming enabled
|
||||||
generate_with_rvc_button.click(
|
generate_with_rvc_button.click(
|
||||||
generate_and_process_with_rvc,
|
generate_and_process_with_rvc,
|
||||||
inputs=[
|
inputs=[
|
||||||
@ -152,7 +152,7 @@ def build_merged_ui():
|
|||||||
rms_mix_rate0,
|
rms_mix_rate0,
|
||||||
protect0,
|
protect0,
|
||||||
],
|
],
|
||||||
outputs=[vc_output1, vc_output2],
|
outputs=[vc_output1, vc_output2]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Connect modified_get_vc function for dropdown change
|
# Connect modified_get_vc function for dropdown change
|
||||||
|
@ -218,7 +218,7 @@ def generate_and_process_with_rvc(
|
|||||||
resample_sr, rms_mix_rate, protect
|
resample_sr, rms_mix_rate, protect
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Handle combined TTS and RVC processing for multiple sentences and save outputs to TEMP directories
|
Handle combined TTS and RVC processing for multiple sentences and yield outputs as they are processed.
|
||||||
"""
|
"""
|
||||||
# Ensure TEMP directories exist
|
# Ensure TEMP directories exist
|
||||||
os.makedirs("./TEMP/spark", exist_ok=True)
|
os.makedirs("./TEMP/spark", exist_ok=True)
|
||||||
@ -227,7 +227,8 @@ def generate_and_process_with_rvc(
|
|||||||
# Split text into sentences
|
# Split text into sentences
|
||||||
sentences = split_into_sentences(text)
|
sentences = split_into_sentences(text)
|
||||||
if not sentences:
|
if not sentences:
|
||||||
return "No valid text to process.", None
|
yield "No valid text to process.", None
|
||||||
|
return
|
||||||
|
|
||||||
# Get next base fragment number
|
# Get next base fragment number
|
||||||
base_fragment_num = 1
|
base_fragment_num = 1
|
||||||
@ -240,10 +241,12 @@ def generate_and_process_with_rvc(
|
|||||||
prompt_speech = prompt_wav_upload if prompt_wav_upload else prompt_wav_record
|
prompt_speech = prompt_wav_upload if prompt_wav_upload else prompt_wav_record
|
||||||
prompt_text_clean = None if not prompt_text or len(prompt_text) < 2 else prompt_text
|
prompt_text_clean = None if not prompt_text or len(prompt_text) < 2 else prompt_text
|
||||||
|
|
||||||
# Process each sentence
|
|
||||||
results = []
|
|
||||||
info_messages = [f"Processing {len(sentences)} sentences..."]
|
info_messages = [f"Processing {len(sentences)} sentences..."]
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# Yield initial message with no audio yet
|
||||||
|
yield "\n".join(info_messages), None
|
||||||
|
|
||||||
for i, sentence in enumerate(sentences):
|
for i, sentence in enumerate(sentences):
|
||||||
spark_path, rvc_path, success, info = process_single_sentence(
|
spark_path, rvc_path, success, info = process_single_sentence(
|
||||||
i, sentence, prompt_speech, prompt_text_clean,
|
i, sentence, prompt_speech, prompt_text_clean,
|
||||||
@ -256,22 +259,22 @@ def generate_and_process_with_rvc(
|
|||||||
info_messages.append(info)
|
info_messages.append(info)
|
||||||
if success and rvc_path:
|
if success and rvc_path:
|
||||||
results.append(rvc_path)
|
results.append(rvc_path)
|
||||||
|
|
||||||
# If no sentences were successfully processed
|
# Build partial concatenation from results so far if any fragment exists
|
||||||
if not results:
|
if results:
|
||||||
return "\n".join(info_messages) + "\n\nNo sentences were successfully processed.", None
|
partial_output_path = f"./TEMP/partial_output_{base_fragment_num}.wav"
|
||||||
|
concatenation_success = concatenate_audio_files(results, partial_output_path)
|
||||||
# Concatenate all successful RVC fragments
|
if not concatenation_success:
|
||||||
final_output_path = f"./TEMP/final_output_{base_fragment_num}.wav"
|
# Fallback: use the latest processed fragment
|
||||||
concatenation_success = concatenate_audio_files(results, final_output_path)
|
partial_output_path = results[-1]
|
||||||
|
else:
|
||||||
if concatenation_success:
|
partial_output_path = None
|
||||||
info_messages.append(f"\nAll fragments concatenated successfully to: {final_output_path}")
|
|
||||||
return "\n".join(info_messages), final_output_path
|
# Yield the current info and the partial audio so far
|
||||||
else:
|
yield "\n".join(info_messages), partial_output_path
|
||||||
# If concatenation failed but we have at least one successful fragment, return the first one
|
|
||||||
info_messages.append(f"\nFailed to concatenate fragments. Returning first successful fragment.")
|
# Optionally, yield one final update (could be identical to the last yield)
|
||||||
return "\n".join(info_messages), results[0]
|
yield "\n".join(info_messages), partial_output_path
|
||||||
|
|
||||||
def modified_get_vc(sid0_value, protect0_value, file_index2_component):
|
def modified_get_vc(sid0_value, protect0_value, file_index2_component):
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user