From 9a10795908c6fceef4186c186bd97c781b87d5fc Mon Sep 17 00:00:00 2001 From: Ftps Date: Sun, 27 Aug 2023 19:49:18 +0900 Subject: [PATCH] repair app.py --- app.py | 199 +++--------------------------------- infer/modules/vc/modules.py | 26 +++-- 2 files changed, 33 insertions(+), 192 deletions(-) diff --git a/app.py b/app.py index 69bb617..e4a6415 100644 --- a/app.py +++ b/app.py @@ -1,22 +1,15 @@ import os -import torch # os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt") import gradio as gr -import librosa -import numpy as np import logging -from fairseq import checkpoint_utils -from lib.train.vc_infer_pipeline import VC -import traceback -from config import defaultconfig as config -from lib.infer_pack.models import ( - SynthesizerTrnMs256NSFsid, - SynthesizerTrnMs256NSFsid_nono, - SynthesizerTrnMs768NSFsid, - SynthesizerTrnMs768NSFsid_nono, -) -from i18n import I18nAuto + +from configs.config import Config + +from i18n.i18n import I18nAuto +from dotenv import load_dotenv + +from infer.modules.vc.modules import VC logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("markdown_it").setLevel(logging.WARNING) @@ -26,8 +19,12 @@ logging.getLogger("matplotlib").setLevel(logging.WARNING) i18n = I18nAuto() i18n.print() -weight_root = "weights" -weight_uvr5_root = "uvr5_weights" +load_dotenv() +config = Config() +vc = VC(config) + +weight_root = os.getenv("weight_root") +weight_uvr5_root = os.getenv("weight_uvr5_root") index_root = "logs" names = [] hubert_model = None @@ -41,168 +38,6 @@ for root, dirs, files in os.walk(index_root, topdown=False): index_paths.append("%s/%s" % (root, name)) -def get_vc(sid): - global n_spk, tgt_sr, net_g, vc, cpt, version - if sid == "" or sid == []: - global hubert_model - if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 - print("clean_empty_cache") - del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt - hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None - if torch.cuda.is_available(): - torch.cuda.empty_cache() - ###楼下不这么折腾清理不干净 - if_f0 = cpt.get("f0", 1) - version = cpt.get("version", "v1") - if version == "v1": - if if_f0 == 1: - net_g = SynthesizerTrnMs256NSFsid( - *cpt["config"], is_half=config.is_half - ) - else: - net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) - elif version == "v2": - if if_f0 == 1: - net_g = SynthesizerTrnMs768NSFsid( - *cpt["config"], is_half=config.is_half - ) - else: - net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) - del net_g, cpt - if torch.cuda.is_available(): - torch.cuda.empty_cache() - cpt = None - return {"visible": False, "__type__": "update"} - person = "%s/%s" % (weight_root, sid) - print("loading %s" % person) - cpt = torch.load(person, map_location="cpu") - tgt_sr = cpt["config"][-1] - cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk - if_f0 = cpt.get("f0", 1) - version = cpt.get("version", "v1") - if version == "v1": - if if_f0 == 1: - net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) - else: - net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) - elif version == "v2": - if if_f0 == 1: - net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) - else: - net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) - del net_g.enc_q - print(net_g.load_state_dict(cpt["weight"], strict=False)) - net_g.eval().to(config.device) - if config.is_half: - net_g = net_g.half() - else: - net_g = net_g.float() - vc = VC(tgt_sr, config) - n_spk = cpt["config"][-3] - return {"visible": True, "maximum": n_spk, "__type__": "update"} - - -def load_hubert(): - global hubert_model - models, _, _ = checkpoint_utils.load_model_ensemble_and_task( - ["hubert_base.pt"], - suffix="", - ) - hubert_model = models[0] - hubert_model = hubert_model.to(config.device) - if config.is_half: - hubert_model = hubert_model.half() - else: - hubert_model = hubert_model.float() - hubert_model.eval() - - -def vc_single( - sid, - input_audio_path, - f0_up_key, - f0_file, - f0_method, - file_index, - file_index2, - # file_big_npy, - index_rate, - filter_radius, - resample_sr, - rms_mix_rate, - protect, -): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 - global tgt_sr, net_g, vc, hubert_model, version - if input_audio_path is None: - return "You need to upload an audio", None - f0_up_key = int(f0_up_key) - try: - audio = input_audio_path[1] / 32768.0 - if len(audio.shape) == 2: - audio = np.mean(audio, -1) - audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000) - audio_max = np.abs(audio).max() / 0.95 - if audio_max > 1: - audio /= audio_max - times = [0, 0, 0] - if hubert_model == None: - load_hubert() - if_f0 = cpt.get("f0", 1) - file_index = ( - ( - file_index.strip(" ") - .strip('"') - .strip("\n") - .strip('"') - .strip(" ") - .replace("trained", "added") - ) - if file_index != "" - else file_index2 - ) # 防止小白写错,自动帮他替换掉 - # file_big_npy = ( - # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - # ) - audio_opt = vc.pipeline( - hubert_model, - net_g, - sid, - audio, - input_audio_path, - times, - f0_up_key, - f0_method, - file_index, - # file_big_npy, - index_rate, - if_f0, - filter_radius, - tgt_sr, - resample_sr, - rms_mix_rate, - version, - protect, - f0_file=f0_file, - ) - if resample_sr >= 16000 and tgt_sr != resample_sr: - tgt_sr = resample_sr - index_info = ( - "Using index:%s." % file_index - if os.path.exists(file_index) - else "Index not used." - ) - return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( - index_info, - times[0], - times[1], - times[2], - ), (tgt_sr, audio_opt) - except: - info = traceback.format_exc() - print(info) - return info, (None, None) - - app = gr.Blocks() with app: with gr.Tabs(): @@ -223,11 +58,7 @@ with app: visible=False, interactive=True, ) - sid.change( - fn=get_vc, - inputs=[sid], - outputs=[spk_item], - ) + sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item]) gr.Markdown( value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") ) @@ -294,7 +125,7 @@ with app: vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) but0.click( - vc_single, + vc.vc_single, [ spk_item, vc_input3, diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 2511214..0fb7c0a 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -29,7 +29,9 @@ class VC: self.config = config - def get_vc(self, sid, to_return_protect0, to_return_protect1): + def get_vc(self, sid, *to_return_protect): + print("aosdijfaofjoaij") + print(to_return_protect) person = f'{os.getenv("weight_root")}/{sid}' print(f"loading {person}") @@ -41,12 +43,16 @@ class VC: to_return_protect0 = { "visible": self.if_f0 != 0, - "value": to_return_protect0 if self.if_f0 != 0 else 0.5, + "value": to_return_protect[0] + if self.if_f0 != 0 and to_return_protect + else 0.5, "__type__": "update", } to_return_protect1 = { "visible": self.if_f0 != 0, - "value": to_return_protect1 if self.if_f0 != 0 else 0.33, + "value": to_return_protect[1] + if self.if_f0 != 0 and to_return_protect + else 0.33, "__type__": "update", } @@ -75,11 +81,15 @@ class VC: index = {"value": get_index_path_from_model(sid), "__type__": "update"} return ( - {"visible": True, "maximum": n_spk, "__type__": "update"}, - to_return_protect0, - to_return_protect1, - index, - index, + ( + {"visible": True, "maximum": n_spk, "__type__": "update"}, + to_return_protect0, + to_return_protect1, + index, + index, + ) + if to_return_protect + else {"visible": True, "maximum": n_spk, "__type__": "update"} ) def vc_single(