mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-04-05 04:08:58 +08:00
Stream audio output
This commit is contained in:
parent
87e06f30ec
commit
249c5d1049
@ -20,9 +20,9 @@ def build_standalone_ui():
|
||||
if __name__ == "__main__":
|
||||
app = build_merged_ui()
|
||||
if config.iscolab:
|
||||
app.launch(share=True)
|
||||
app.queue().launch(share=True)
|
||||
else:
|
||||
app.launch(
|
||||
app.queue().launch(
|
||||
server_name="localhost",
|
||||
inbrowser=not config.noautoopen,
|
||||
server_port=config.listen_port,
|
||||
|
@ -17,7 +17,7 @@ def build_merged_ui():
|
||||
with gr.Tabs():
|
||||
with gr.TabItem("TTS-to-RVC Pipeline"):
|
||||
gr.Markdown("### Generate speech with Spark TTS and convert with RVC")
|
||||
gr.Markdown("*Note: For multi-sentence text, each sentence will be processed separately and then combined.*")
|
||||
gr.Markdown("*Note: For multi-sentence text, each sentence will be processed separately and streamed as it’s ready.*")
|
||||
|
||||
# TTS Generation Section
|
||||
with gr.Row():
|
||||
@ -131,9 +131,9 @@ def build_merged_ui():
|
||||
|
||||
with gr.Row():
|
||||
vc_output1 = gr.Textbox(label="Output information", lines=10)
|
||||
vc_output2 = gr.Audio(label="Final concatenated audio")
|
||||
vc_output2 = gr.Audio(label="Streaming concatenated audio", autoplay=True)
|
||||
|
||||
# Connect generate function to button
|
||||
# Connect generate function to button with streaming enabled
|
||||
generate_with_rvc_button.click(
|
||||
generate_and_process_with_rvc,
|
||||
inputs=[
|
||||
@ -152,7 +152,7 @@ def build_merged_ui():
|
||||
rms_mix_rate0,
|
||||
protect0,
|
||||
],
|
||||
outputs=[vc_output1, vc_output2],
|
||||
outputs=[vc_output1, vc_output2]
|
||||
)
|
||||
|
||||
# Connect modified_get_vc function for dropdown change
|
||||
|
@ -218,7 +218,7 @@ def generate_and_process_with_rvc(
|
||||
resample_sr, rms_mix_rate, protect
|
||||
):
|
||||
"""
|
||||
Handle combined TTS and RVC processing for multiple sentences and save outputs to TEMP directories
|
||||
Handle combined TTS and RVC processing for multiple sentences and yield outputs as they are processed.
|
||||
"""
|
||||
# Ensure TEMP directories exist
|
||||
os.makedirs("./TEMP/spark", exist_ok=True)
|
||||
@ -227,7 +227,8 @@ def generate_and_process_with_rvc(
|
||||
# Split text into sentences
|
||||
sentences = split_into_sentences(text)
|
||||
if not sentences:
|
||||
return "No valid text to process.", None
|
||||
yield "No valid text to process.", None
|
||||
return
|
||||
|
||||
# Get next base fragment number
|
||||
base_fragment_num = 1
|
||||
@ -240,9 +241,11 @@ def generate_and_process_with_rvc(
|
||||
prompt_speech = prompt_wav_upload if prompt_wav_upload else prompt_wav_record
|
||||
prompt_text_clean = None if not prompt_text or len(prompt_text) < 2 else prompt_text
|
||||
|
||||
# Process each sentence
|
||||
results = []
|
||||
info_messages = [f"Processing {len(sentences)} sentences..."]
|
||||
results = []
|
||||
|
||||
# Yield initial message with no audio yet
|
||||
yield "\n".join(info_messages), None
|
||||
|
||||
for i, sentence in enumerate(sentences):
|
||||
spark_path, rvc_path, success, info = process_single_sentence(
|
||||
@ -257,21 +260,21 @@ def generate_and_process_with_rvc(
|
||||
if success and rvc_path:
|
||||
results.append(rvc_path)
|
||||
|
||||
# If no sentences were successfully processed
|
||||
if not results:
|
||||
return "\n".join(info_messages) + "\n\nNo sentences were successfully processed.", None
|
||||
|
||||
# Concatenate all successful RVC fragments
|
||||
final_output_path = f"./TEMP/final_output_{base_fragment_num}.wav"
|
||||
concatenation_success = concatenate_audio_files(results, final_output_path)
|
||||
|
||||
if concatenation_success:
|
||||
info_messages.append(f"\nAll fragments concatenated successfully to: {final_output_path}")
|
||||
return "\n".join(info_messages), final_output_path
|
||||
# Build partial concatenation from results so far if any fragment exists
|
||||
if results:
|
||||
partial_output_path = f"./TEMP/partial_output_{base_fragment_num}.wav"
|
||||
concatenation_success = concatenate_audio_files(results, partial_output_path)
|
||||
if not concatenation_success:
|
||||
# Fallback: use the latest processed fragment
|
||||
partial_output_path = results[-1]
|
||||
else:
|
||||
# If concatenation failed but we have at least one successful fragment, return the first one
|
||||
info_messages.append(f"\nFailed to concatenate fragments. Returning first successful fragment.")
|
||||
return "\n".join(info_messages), results[0]
|
||||
partial_output_path = None
|
||||
|
||||
# Yield the current info and the partial audio so far
|
||||
yield "\n".join(info_messages), partial_output_path
|
||||
|
||||
# Optionally, yield one final update (could be identical to the last yield)
|
||||
yield "\n".join(info_messages), partial_output_path
|
||||
|
||||
def modified_get_vc(sid0_value, protect0_value, file_index2_component):
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user