From 9ff976b155bee6651088caa71ffede3228effb97 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 6 Jun 2023 22:32:10 +0800 Subject: [PATCH] Add files via upload --- MDXNet.py | 30 ++++++++++--- infer-web.py | 27 ++++++++--- infer_uvr5.py | 122 +++++++++++++++++++++++++++++++++++++------------- 3 files changed, 134 insertions(+), 45 deletions(-) diff --git a/MDXNet.py b/MDXNet.py index 7a1445a..d244f8a 100644 --- a/MDXNet.py +++ b/MDXNet.py @@ -86,7 +86,12 @@ def get_models(device, dim_f, dim_t, n_fft): warnings.filterwarnings("ignore") cpu = torch.device("cpu") -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +if torch.cuda.is_available(): + device = torch.device("cuda:0") +elif torch.backends.mps.is_available(): + device = torch.device("mps") +else: + device = torch.device("cpu") class Predictor: @@ -201,11 +206,24 @@ class Predictor: mix = mix.T sources = self.demix(mix.T) opt = sources[0].T - sf.write( - "%s/%s_main_vocal.%s" % (vocal_root, basename, format), mix - opt, rate - ) - sf.write("%s/%s_others.%s" % (others_root, basename, format), opt, rate) - + if(format in ["wav", "flac"]): + sf.write("%s/%s_main_vocal.%s" % (vocal_root, basename, format), mix - opt, rate) + sf.write("%s/%s_others.%s" % (others_root, basename, format), opt, rate) + else: + path_vocal="%s/%s_main_vocal.wav" % (vocal_root, basename) + path_other="%s/%s_others.wav" % (others_root, basename) + sf.write(path_vocal, mix - opt, rate) + sf.write(path_other, opt, rate) + if (os.path.exists(path_vocal)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path_vocal, path_vocal[:-4] + ".%s" % format) + ) + if (os.path.exists(path_other)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path_other, path_other[:-4] + ".%s" % format) + ) class MDXNetDereverb: def __init__(self, chunks): diff --git a/infer-web.py b/infer-web.py index a9497ca..1d2f713 100644 --- a/infer-web.py +++ b/infer-web.py @@ -272,11 +272,24 @@ def vc_multi( if "Success" in info: try: tgt_sr, audio_opt = opt - sf.write( - "%s/%s.%s" % (opt_root, os.path.basename(path), format1), - audio_opt, - tgt_sr, - ) + if (format1 in ["wav", "flac"]): + sf.write( + "%s/%s.%s" % (opt_root, os.path.basename(path), format1), + audio_opt, + tgt_sr, + ) + else: + path="%s/%s.wav" % (opt_root, os.path.basename(path)) + sf.write( + path, + audio_opt, + tgt_sr, + ) + if (os.path.exists(path)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format1) + ) except: info += traceback.format_exc() infos.append("%s->%s" % (os.path.basename(path), info)) @@ -1400,8 +1413,8 @@ with gr.Blocks() as app: "1、保留人声:不带和声的音频选这个,对主人声保留比HP5更好。内置HP2和HP3两个模型,HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点;
" "2、仅保留主人声:带和声的音频选这个,对主人声可能有削弱。内置HP5一个模型;
" "3、去混响、去延迟模型(by FoxJoy):
" - "  (1)MDX-Net:对于双通道混响是最好的选择,不能去除单通道混响;
" - " (234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底,DeReverb额外去除混响,可去除单声道混响,但是对高频重的板式混响去不干净。
" + "  (1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;
" + " (234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底,DeReverb额外去除混响,可去除单声道混响,但是对高频重的板式混响去不干净。
" "去混响/去延迟,附:
" "1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍;
" "2、MDX-Net-Dereverb模型挺慢的;
" diff --git a/infer_uvr5.py b/infer_uvr5.py index c188c1a..bcbf78c 100644 --- a/infer_uvr5.py +++ b/infer_uvr5.py @@ -123,14 +123,29 @@ class _audio_pre_: else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) print("%s instruments done" % name) - sf.write( - os.path.join( - ins_root, - "instrument_{}_{}.{}".format(name, self.data["agg"], format), - ), - (np.array(wav_instrument) * 32768).astype("int16"), - self.mp.param["sr"], - ) # + if(format in ["wav","flac"]): + sf.write( + os.path.join( + ins_root, + "instrument_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) # + else: + path=os.path.join( + ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path , + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if(os.path.exists(path)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4]+".%s"%format) + ) if vocal_root is not None: if self.data["high_end_process"].startswith("mirroring"): input_high_end_ = spec_utils.mirroring( @@ -142,14 +157,28 @@ class _audio_pre_: else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) print("%s vocals done" % name) - sf.write( - os.path.join( - vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"], format) - ), - (np.array(wav_vocals) * 32768).astype("int16"), - self.mp.param["sr"], - ) - + if(format in ["wav","flac"]): + sf.write( + os.path.join( + vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"], format) + ), + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + else: + path=os.path.join( + vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path , + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if(os.path.exists(path)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4]+".%s"%format) + ) class _audio_pre_new: def __init__(self, agg, model_path, device, is_half): @@ -259,14 +288,29 @@ class _audio_pre_new: else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) print("%s instruments done" % name) - sf.write( - os.path.join( - ins_root, - "main_vocal_{}_{}.{}".format(name, self.data["agg"], format), - ), - (np.array(wav_instrument) * 32768).astype("int16"), - self.mp.param["sr"], - ) # + if(format in ["wav","flac"]): + sf.write( + os.path.join( + ins_root, + "instrument_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) # + else: + path=os.path.join( + ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path , + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if(os.path.exists(path)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4]+".%s"%format) + ) if vocal_root is not None: if self.data["high_end_process"].startswith("mirroring"): input_high_end_ = spec_utils.mirroring( @@ -278,14 +322,28 @@ class _audio_pre_new: else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) print("%s vocals done" % name) - sf.write( - os.path.join( - vocal_root, "others_{}_{}.{}".format(name, self.data["agg"], format) - ), - (np.array(wav_vocals) * 32768).astype("int16"), - self.mp.param["sr"], - ) - + if(format in ["wav","flac"]): + sf.write( + os.path.join( + vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"], format) + ), + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + else: + path=os.path.join( + vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path , + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if(os.path.exists(path)): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4]+".%s"%format) + ) if __name__ == "__main__": device = "cuda"