From 7bd25c4623a1cc9cc721b7de4f66cc44d9ae234c Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sun, 28 May 2023 23:40:54 +0800 Subject: [PATCH] Add files via upload --- MDXNet.py | 10 +++++----- infer-web.py | 35 +++++++++++++++++++++++++---------- infer_uvr5.py | 34 +++++++++++++++------------------- requirements.txt | 2 +- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/MDXNet.py b/MDXNet.py index 02b37f7..6e6191b 100644 --- a/MDXNet.py +++ b/MDXNet.py @@ -141,7 +141,7 @@ class Predictor: # del self.model progress_bar.close() return _sources - def prediction(self, m,vocal_root,others_root): + def prediction(self, m,vocal_root,others_root,format): os.makedirs(vocal_root,exist_ok=True) os.makedirs(others_root,exist_ok=True) basename = os.path.basename(m) @@ -151,8 +151,8 @@ class Predictor: mix = mix.T sources = self.demix(mix.T) opt=sources[0].T - sf.write("%s/%s_main_vocal.wav"%(vocal_root,basename), mix-opt, rate) - sf.write("%s/%s_others.wav"%(others_root,basename), opt , rate) + sf.write("%s/%s_main_vocal.%s"%(vocal_root,basename,format), mix-opt, rate) + sf.write("%s/%s_others.%s"%(others_root,basename,format), opt , rate) class MDXNetDereverb(): def __init__(self,chunks): @@ -167,8 +167,8 @@ class MDXNetDereverb(): self.denoise=True self.pred=Predictor(self) - def _path_audio_(self,input,vocal_root,others_root): - self.pred.prediction(input,vocal_root,others_root) + def _path_audio_(self,input,vocal_root,others_root,format): + self.pred.prediction(input,vocal_root,others_root,format) if __name__ == '__main__': dereverb=MDXNetDereverb(15) diff --git a/infer-web.py b/infer-web.py index 8596da6..3e8eeb8 100644 --- a/infer-web.py +++ b/infer-web.py @@ -77,7 +77,7 @@ from infer_pack.models import ( SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono, ) -from scipy.io import wavfile +import soundfile as sf from fairseq import checkpoint_utils import gradio as gr import logging @@ -235,7 +235,8 @@ def vc_multi( filter_radius, resample_sr, rms_mix_rate, - protect + protect, + format1 ): try: dir_path = ( @@ -271,8 +272,8 @@ def vc_multi( if "Success" in info: try: tgt_sr, audio_opt = opt - wavfile.write( - "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt + sf.write( + "%s/%s.%s" % (opt_root, os.path.basename(path),format1), audio_opt,tgt_sr ) except: info += traceback.format_exc() @@ -283,7 +284,7 @@ def vc_multi( yield traceback.format_exc() -def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg): +def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg,format0): infos = [] try: inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") @@ -318,7 +319,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg): and info["streams"][0]["sample_rate"] == "44100" ): need_reformat = 0 - pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal) + pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0) done = 1 except: need_reformat = 1 @@ -332,7 +333,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg): inp_path = tmp_path try: if done == 0: - pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal) + pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0) infos.append("%s->Success" % (os.path.basename(inp_path))) yield "\n".join(infos) except: @@ -1341,6 +1342,12 @@ with gr.Blocks() as app: file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") ) with gr.Row(): + format1= gr.Radio( + label=i18n("导出文件格式"), + choices=["wav", "flac","mp3","m4a"], + value="flac", + interactive=True, + ) but1 = gr.Button(i18n("转换"), variant="primary") vc_output3 = gr.Textbox(label=i18n("输出信息")) but1.click( @@ -1359,7 +1366,8 @@ with gr.Blocks() as app: filter_radius1, resample_sr1, rms_mix_rate1, - protect1 + protect1, + format1 ], [vc_output3], ) @@ -1402,9 +1410,15 @@ with gr.Blocks() as app: visible=False, # 先不开放调整 ) opt_vocal_root = gr.Textbox( - label=i18n("指定输出人声文件夹"), value="opt" + label=i18n("指定输出主人声文件夹"), value="opt" + ) + opt_ins_root = gr.Textbox(label=i18n("指定输出非主人声文件夹"), value="opt") + format0= gr.Radio( + label=i18n("导出文件格式"), + choices=["wav", "flac","mp3","m4a"], + value="flac", + interactive=True, ) - opt_ins_root = gr.Textbox(label=i18n("指定输出乐器文件夹"), value="opt") but2 = gr.Button(i18n("转换"), variant="primary") vc_output4 = gr.Textbox(label=i18n("输出信息")) but2.click( @@ -1416,6 +1430,7 @@ with gr.Blocks() as app: wav_inputs, opt_ins_root, agg, + format0 ], [vc_output4], ) diff --git a/infer_uvr5.py b/infer_uvr5.py index 4948d17..614bba1 100644 --- a/infer_uvr5.py +++ b/infer_uvr5.py @@ -11,7 +11,7 @@ from tqdm import tqdm from uvr5_pack.lib_v5 import spec_utils from uvr5_pack.utils import _get_name_params, inference from uvr5_pack.lib_v5.model_param_init import ModelParameters -from scipy.io import wavfile +import soundfile as sf from uvr5_pack.lib_v5.nets_new import CascadedNet from uvr5_pack.lib_v5 import nets_61968KB as nets @@ -41,7 +41,7 @@ class _audio_pre_: self.mp = mp self.model = model - def _path_audio_(self, music_file, ins_root=None, vocal_root=None): + def _path_audio_(self, music_file, ins_root=None, vocal_root=None,format="flac"): if ins_root is None and vocal_root is None: return "No save root." name = os.path.basename(music_file) @@ -120,12 +120,11 @@ class _audio_pre_: else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) print("%s instruments done" % name) - wavfile.write( + sf.write( os.path.join( - ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) + ins_root, "instrument_{}_{}.{}".format(name, self.data["agg"],format) ), - self.mp.param["sr"], - (np.array(wav_instrument) * 32768).astype("int16"), + (np.array(wav_instrument) * 32768).astype("int16"), self.mp.param["sr"], ) # if vocal_root is not None: if self.data["high_end_process"].startswith("mirroring"): @@ -138,12 +137,11 @@ class _audio_pre_: else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) print("%s vocals done" % name) - wavfile.write( + sf.write( os.path.join( - vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) + vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"],format) ), - self.mp.param["sr"], - (np.array(wav_vocals) * 32768).astype("int16"), + (np.array(wav_vocals) * 32768).astype("int16"), self.mp.param["sr"], ) class _audio_pre_new: @@ -173,7 +171,7 @@ class _audio_pre_new: self.mp = mp self.model = model - def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型vocal和ins是反的 + def _path_audio_(self, music_file, vocal_root=None, ins_root=None,format="flac"):#3个VR模型vocal和ins是反的 if ins_root is None and vocal_root is None: return "No save root." name = os.path.basename(music_file) @@ -252,12 +250,11 @@ class _audio_pre_new: else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) print("%s instruments done" % name) - wavfile.write( + sf.write( os.path.join( - ins_root, "main_vocal_{}_{}.wav".format(name, self.data["agg"]) + ins_root, "main_vocal_{}_{}.{}".format(name, self.data["agg"],format) ), - self.mp.param["sr"], - (np.array(wav_instrument) * 32768).astype("int16"), + (np.array(wav_instrument) * 32768).astype("int16"),self.mp.param["sr"], ) # if vocal_root is not None: if self.data["high_end_process"].startswith("mirroring"): @@ -270,12 +267,11 @@ class _audio_pre_new: else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) print("%s vocals done" % name) - wavfile.write( + sf.write( os.path.join( - vocal_root, "others_{}_{}.wav".format(name, self.data["agg"]) + vocal_root, "others_{}_{}.{}".format(name, self.data["agg"],format) ), - self.mp.param["sr"], - (np.array(wav_vocals) * 32768).astype("int16"), + (np.array(wav_vocals) * 32768).astype("int16"),self.mp.param["sr"], ) diff --git a/requirements.txt b/requirements.txt index 6004c1e..9ccdc96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,4 +40,4 @@ colorama>=0.4.5 pyworld>=0.3.2 httpx==0.23.0 onnxruntime-gpu -torchcrepe +torchcrepe \ No newline at end of file