From c054138f0faec6d71cb330b1298bf8e691232ef6 Mon Sep 17 00:00:00 2001 From: Ftps Date: Sat, 19 Aug 2023 22:47:10 +0900 Subject: [PATCH] remove moved func --- infer-web.py | 465 ++------------------------------------------------- 1 file changed, 18 insertions(+), 447 deletions(-) diff --git a/infer-web.py b/infer-web.py index c5fd117..1959ea0 100644 --- a/infer-web.py +++ b/infer-web.py @@ -4,7 +4,7 @@ import sys now_dir = os.getcwd() sys.path.append(now_dir) -import traceback, pdb +import traceback import warnings import numpy as np @@ -19,25 +19,19 @@ from subprocess import Popen from time import sleep import faiss -import ffmpeg import gradio as gr -import soundfile as sf -from config import Config +from configs.config import Config import fairseq from i18n import I18nAuto -from lib.infer_pack.models import ( - SynthesizerTrnMs256NSFsid, - SynthesizerTrnMs256NSFsid_nono, - SynthesizerTrnMs768NSFsid, - SynthesizerTrnMs768NSFsid_nono, -) -from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM -from infer_uvr5 import _audio_pre_, _audio_pre_new -from lib.audio import load_audio from lib.train.process_ckpt import change_info, extract_small_model, merge, show_info -from vc_infer_pipeline import VC from sklearn.cluster import MiniBatchKMeans +from dotenv import load_dotenv + +from infer.modules.vc.modules import VC +from infer.modules.uvr5.modules import uvr +from infer.modules.onnx.export import export_onnx + logging.getLogger("numba").setLevel(logging.WARNING) now_dir = os.getcwd() @@ -48,12 +42,13 @@ shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_error os.makedirs(tmp, exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) -os.environ["TEMP"] = tmp warnings.filterwarnings("ignore") torch.manual_seed(114514) - +load_dotenv() config = Config() +vc = VC(config) + if config.dml == True: def forward_dml(ctx, x, scale): @@ -127,27 +122,10 @@ class ToolButton(gr.Button, gr.components.FormComponent): return "button" -hubert_model = None - - -def load_hubert(): - global hubert_model - models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( - ["hubert_base.pt"], - suffix="", - ) - hubert_model = models[0] - hubert_model = hubert_model.to(config.device) - if config.is_half: - hubert_model = hubert_model.half() - else: - hubert_model = hubert_model.float() - hubert_model.eval() - - -weight_root = "weights" -weight_uvr5_root = "uvr5_weights" +weight_root = os.getenv("weight_root") +weight_uvr5_root = os.getenv("weight_uvr5_root") index_root = "logs" + names = [] for name in os.listdir(weight_root): if name.endswith(".pth"): @@ -162,365 +140,6 @@ for name in os.listdir(weight_uvr5_root): if name.endswith(".pth") or "onnx" in name: uvr5_names.append(name.replace(".pth", "")) -cpt = None - - -def vc_single( - sid, - input_audio_path, - f0_up_key, - f0_file, - f0_method, - file_index, - file_index2, - # file_big_npy, - index_rate, - filter_radius, - resample_sr, - rms_mix_rate, - protect, -): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 - global tgt_sr, net_g, vc, hubert_model, version, cpt - if input_audio_path is None: - return "You need to upload an audio", None - f0_up_key = int(f0_up_key) - try: - audio = load_audio(input_audio_path, 16000) - audio_max = np.abs(audio).max() / 0.95 - if audio_max > 1: - audio /= audio_max - times = [0, 0, 0] - if not hubert_model: - load_hubert() - if_f0 = cpt.get("f0", 1) - file_index = ( - ( - file_index.strip(" ") - .strip('"') - .strip("\n") - .strip('"') - .strip(" ") - .replace("trained", "added") - ) - if file_index != "" - else file_index2 - ) # 防止小白写错,自动帮他替换掉 - # file_big_npy = ( - # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - # ) - audio_opt = vc.pipeline( - hubert_model, - net_g, - sid, - audio, - input_audio_path, - times, - f0_up_key, - f0_method, - file_index, - # file_big_npy, - index_rate, - if_f0, - filter_radius, - tgt_sr, - resample_sr, - rms_mix_rate, - version, - protect, - f0_file=f0_file, - ) - index_info = ( - "Using index:%s." % file_index - if os.path.exists(file_index) - else "Index not used." - ) - return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( - index_info, - times[0], - times[1], - times[2], - ), ( - resample_sr if resample_sr >= 16000 and tgt_sr != resample_sr else tgt_sr, - audio_opt, - ) - except: - info = traceback.format_exc() - print(info) - return info, (None, None) - - -def vc_multi( - sid, - dir_path, - opt_root, - paths, - f0_up_key, - f0_method, - file_index, - file_index2, - # file_big_npy, - index_rate, - filter_radius, - resample_sr, - rms_mix_rate, - protect, - format1, -): - try: - dir_path = ( - dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) # 防止小白拷路径头尾带了空格和"和回车 - opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - os.makedirs(opt_root, exist_ok=True) - try: - if dir_path != "": - paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] - else: - paths = [path.name for path in paths] - except: - traceback.print_exc() - paths = [path.name for path in paths] - infos = [] - for path in paths: - info, opt = vc_single( - sid, - path, - f0_up_key, - None, - f0_method, - file_index, - file_index2, - # file_big_npy, - index_rate, - filter_radius, - resample_sr, - rms_mix_rate, - protect, - ) - if "Success" in info: - try: - tgt_sr, audio_opt = opt - if format1 in ["wav", "flac"]: - sf.write( - "%s/%s.%s" % (opt_root, os.path.basename(path), format1), - audio_opt, - tgt_sr, - ) - else: - path = "%s/%s.wav" % (opt_root, os.path.basename(path)) - sf.write( - path, - audio_opt, - tgt_sr, - ) - if os.path.exists(path): - os.system( - "ffmpeg -i %s -vn %s -q:a 2 -y" - % (path, path[:-4] + ".%s" % format1) - ) - except: - info += traceback.format_exc() - infos.append("%s->%s" % (os.path.basename(path), info)) - yield "\n".join(infos) - yield "\n".join(infos) - except: - yield traceback.format_exc() - - -def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): - infos = [] - try: - inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - save_root_vocal = ( - save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) - save_root_ins = ( - save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) - if model_name == "onnx_dereverb_By_FoxJoy": - from MDXNet import MDXNetDereverb - - pre_fun = MDXNetDereverb(15) - else: - func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new - pre_fun = func( - agg=int(agg), - model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), - device=config.device, - is_half=config.is_half, - ) - if inp_root != "": - paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] - else: - paths = [path.name for path in paths] - for path in paths: - inp_path = os.path.join(inp_root, path) - need_reformat = 1 - done = 0 - try: - info = ffmpeg.probe(inp_path, cmd="ffprobe") - if ( - info["streams"][0]["channels"] == 2 - and info["streams"][0]["sample_rate"] == "44100" - ): - need_reformat = 0 - pre_fun._path_audio_( - inp_path, save_root_ins, save_root_vocal, format0 - ) - done = 1 - except: - need_reformat = 1 - traceback.print_exc() - if need_reformat == 1: - tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path)) - os.system( - "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" - % (inp_path, tmp_path) - ) - inp_path = tmp_path - try: - if done == 0: - pre_fun._path_audio_( - inp_path, save_root_ins, save_root_vocal, format0 - ) - infos.append("%s->Success" % (os.path.basename(inp_path))) - yield "\n".join(infos) - except: - infos.append( - "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) - ) - yield "\n".join(infos) - except: - infos.append(traceback.format_exc()) - yield "\n".join(infos) - finally: - try: - if model_name == "onnx_dereverb_By_FoxJoy": - del pre_fun.pred.model - del pre_fun.pred.model_ - else: - del pre_fun.model - del pre_fun - except: - traceback.print_exc() - print("clean_empty_cache") - if torch.cuda.is_available(): - torch.cuda.empty_cache() - yield "\n".join(infos) - - -def get_index_path_from_model(sid): - sel_index_path = "" - name = os.path.join("logs", sid.split(".")[0], "") - # print(name) - for f in index_paths: - if name in f: - # print("selected index path:", f) - sel_index_path = f - break - return sel_index_path - - -# 一个选项卡全局只能有一个音色 -def get_vc(sid, to_return_protect0, to_return_protect1): - global n_spk, tgt_sr, net_g, vc, cpt, version - if sid == "" or sid == []: - global hubert_model - if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 - print("clean_empty_cache") - del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt - hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None - if torch.cuda.is_available(): - torch.cuda.empty_cache() - ###楼下不这么折腾清理不干净 - if_f0 = cpt.get("f0", 1) - version = cpt.get("version", "v1") - if version == "v1": - if if_f0 == 1: - net_g = SynthesizerTrnMs256NSFsid( - *cpt["config"], is_half=config.is_half - ) - else: - net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) - elif version == "v2": - if if_f0 == 1: - net_g = SynthesizerTrnMs768NSFsid( - *cpt["config"], is_half=config.is_half - ) - else: - net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) - del net_g, cpt - if torch.cuda.is_available(): - torch.cuda.empty_cache() - return ( - {"visible": False, "__type__": "update"}, - { - "visible": True, - "value": to_return_protect0, - "__type__": "update", - }, - { - "visible": True, - "value": to_return_protect1, - "__type__": "update", - }, - "", - "", - ) - person = "%s/%s" % (weight_root, sid) - print("loading %s" % person) - - cpt = torch.load(person, map_location="cpu") - tgt_sr = cpt["config"][-1] - cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk - if_f0 = cpt.get("f0", 1) - if if_f0 == 0: - to_return_protect0 = to_return_protect1 = { - "visible": False, - "value": 0.33, - "__type__": "update", - } - else: - to_return_protect0 = { - "visible": True, - "value": to_return_protect0, - "__type__": "update", - } - to_return_protect1 = { - "visible": True, - "value": to_return_protect1, - "__type__": "update", - } - version = cpt.get("version", "v1") - if version == "v1": - if if_f0 == 1: - net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) - else: - net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) - elif version == "v2": - if if_f0 == 1: - net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) - else: - net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) - del net_g.enc_q - print(net_g.load_state_dict(cpt["weight"], strict=False)) - net_g.eval().to(config.device) - if config.is_half: - net_g = net_g.half() - else: - net_g = net_g.float() - vc = VC(tgt_sr, config) - n_spk = cpt["config"][-3] - index = {"value": get_index_path_from_model(sid), "__type__": "update"} - return ( - {"visible": True, "maximum": n_spk, "__type__": "update"}, - to_return_protect0, - to_return_protect1, - index, - index, - ) - - def change_choices(): names = [] for name in os.listdir(weight_root): @@ -1385,54 +1004,6 @@ def change_f0_method(f0method8): return {"visible": visible, "__type__": "update"} -def export_onnx(ModelPath, ExportedPath): - global cpt - cpt = torch.load(ModelPath, map_location="cpu") - cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] - vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768 - - test_phone = torch.rand(1, 200, vec_channels) # hidden unit - test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) - test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) - test_pitchf = torch.rand(1, 200) # nsf基频 - test_ds = torch.LongTensor([0]) # 说话人ID - test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) - - device = "cpu" # 导出时设备(不影响使用模型) - - net_g = SynthesizerTrnMsNSFsidM( - *cpt["config"], is_half=False, version=cpt.get("version", "v1") - ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) - net_g.load_state_dict(cpt["weight"], strict=False) - input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] - output_names = [ - "audio", - ] - # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出 - torch.onnx.export( - net_g, - ( - test_phone.to(device), - test_phone_lengths.to(device), - test_pitch.to(device), - test_pitchf.to(device), - test_ds.to(device), - test_rnd.to(device), - ), - ExportedPath, - dynamic_axes={ - "phone": [1], - "pitch": [1], - "pitchf": [1], - "rnd": [2], - }, - do_constant_folding=False, - opset_version=13, - verbose=False, - input_names=input_names, - output_names=output_names, - ) - return "Finished" with gr.Blocks(title="RVC WebUI") as app: @@ -1551,7 +1122,7 @@ with gr.Blocks(title="RVC WebUI") as app: vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) but0.click( - vc_single, + vc.vc_single, [ spk_item, input_audio0, @@ -1671,7 +1242,7 @@ with gr.Blocks(title="RVC WebUI") as app: but1 = gr.Button(i18n("转换"), variant="primary") vc_output3 = gr.Textbox(label=i18n("输出信息")) but1.click( - vc_multi, + vc.vc_multi, [ spk_item, dir_input, @@ -1693,7 +1264,7 @@ with gr.Blocks(title="RVC WebUI") as app: api_name="infer_convert_batch", ) sid0.change( - fn=get_vc, + fn=vc.get_vc, inputs=[sid0, protect0, protect1], outputs=[spk_item, protect0, protect1, file_index2, file_index4], ) @@ -1967,7 +1538,7 @@ with gr.Blocks(title="RVC WebUI") as app: info3, api_name="train_start", ) - but4.click(train_index, [exp_dir1, version19], info3) + but4.click(train_index, [exp_dir1, version19, config.n_cpu], info3) but5.click( train1key, [