repair app.py

This commit is contained in:
Ftps 2023-08-27 19:49:18 +09:00
parent 3f3177b5ce
commit 9a10795908
2 changed files with 33 additions and 192 deletions

199
app.py
View File

@ -1,22 +1,15 @@
import os import os
import torch
# os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt") # os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
import gradio as gr import gradio as gr
import librosa
import numpy as np
import logging import logging
from fairseq import checkpoint_utils
from lib.train.vc_infer_pipeline import VC from configs.config import Config
import traceback
from config import defaultconfig as config from i18n.i18n import I18nAuto
from lib.infer_pack.models import ( from dotenv import load_dotenv
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono, from infer.modules.vc.modules import VC
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
from i18n import I18nAuto
logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("markdown_it").setLevel(logging.WARNING) logging.getLogger("markdown_it").setLevel(logging.WARNING)
@ -26,8 +19,12 @@ logging.getLogger("matplotlib").setLevel(logging.WARNING)
i18n = I18nAuto() i18n = I18nAuto()
i18n.print() i18n.print()
weight_root = "weights" load_dotenv()
weight_uvr5_root = "uvr5_weights" config = Config()
vc = VC(config)
weight_root = os.getenv("weight_root")
weight_uvr5_root = os.getenv("weight_uvr5_root")
index_root = "logs" index_root = "logs"
names = [] names = []
hubert_model = None hubert_model = None
@ -41,168 +38,6 @@ for root, dirs, files in os.walk(index_root, topdown=False):
index_paths.append("%s/%s" % (root, name)) index_paths.append("%s/%s" % (root, name))
def get_vc(sid):
global n_spk, tgt_sr, net_g, vc, cpt, version
if sid == "" or sid == []:
global hubert_model
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print("clean_empty_cache")
del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
if torch.cuda.is_available():
torch.cuda.empty_cache()
###楼下不这么折腾清理不干净
if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g, cpt
if torch.cuda.is_available():
torch.cuda.empty_cache()
cpt = None
return {"visible": False, "__type__": "update"}
person = "%s/%s" % (weight_root, sid)
print("loading %s" % person)
cpt = torch.load(person, map_location="cpu")
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(config.device)
if config.is_half:
net_g = net_g.half()
else:
net_g = net_g.float()
vc = VC(tgt_sr, config)
n_spk = cpt["config"][-3]
return {"visible": True, "maximum": n_spk, "__type__": "update"}
def load_hubert():
global hubert_model
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["hubert_base.pt"],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(config.device)
if config.is_half:
hubert_model = hubert_model.half()
else:
hubert_model = hubert_model.float()
hubert_model.eval()
def vc_single(
sid,
input_audio_path,
f0_up_key,
f0_file,
f0_method,
file_index,
file_index2,
# file_big_npy,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
global tgt_sr, net_g, vc, hubert_model, version
if input_audio_path is None:
return "You need to upload an audio", None
f0_up_key = int(f0_up_key)
try:
audio = input_audio_path[1] / 32768.0
if len(audio.shape) == 2:
audio = np.mean(audio, -1)
audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
audio_max = np.abs(audio).max() / 0.95
if audio_max > 1:
audio /= audio_max
times = [0, 0, 0]
if hubert_model == None:
load_hubert()
if_f0 = cpt.get("f0", 1)
file_index = (
(
file_index.strip(" ")
.strip('"')
.strip("\n")
.strip('"')
.strip(" ")
.replace("trained", "added")
)
if file_index != ""
else file_index2
) # 防止小白写错,自动帮他替换掉
# file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# )
audio_opt = vc.pipeline(
hubert_model,
net_g,
sid,
audio,
input_audio_path,
times,
f0_up_key,
f0_method,
file_index,
# file_big_npy,
index_rate,
if_f0,
filter_radius,
tgt_sr,
resample_sr,
rms_mix_rate,
version,
protect,
f0_file=f0_file,
)
if resample_sr >= 16000 and tgt_sr != resample_sr:
tgt_sr = resample_sr
index_info = (
"Using index:%s." % file_index
if os.path.exists(file_index)
else "Index not used."
)
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
index_info,
times[0],
times[1],
times[2],
), (tgt_sr, audio_opt)
except:
info = traceback.format_exc()
print(info)
return info, (None, None)
app = gr.Blocks() app = gr.Blocks()
with app: with app:
with gr.Tabs(): with gr.Tabs():
@ -223,11 +58,7 @@ with app:
visible=False, visible=False,
interactive=True, interactive=True,
) )
sid.change( sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item])
fn=get_vc,
inputs=[sid],
outputs=[spk_item],
)
gr.Markdown( gr.Markdown(
value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
) )
@ -294,7 +125,7 @@ with app:
vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output1 = gr.Textbox(label=i18n("输出信息"))
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
but0.click( but0.click(
vc_single, vc.vc_single,
[ [
spk_item, spk_item,
vc_input3, vc_input3,

View File

@ -29,7 +29,9 @@ class VC:
self.config = config self.config = config
def get_vc(self, sid, to_return_protect0, to_return_protect1): def get_vc(self, sid, *to_return_protect):
print("aosdijfaofjoaij")
print(to_return_protect)
person = f'{os.getenv("weight_root")}/{sid}' person = f'{os.getenv("weight_root")}/{sid}'
print(f"loading {person}") print(f"loading {person}")
@ -41,12 +43,16 @@ class VC:
to_return_protect0 = { to_return_protect0 = {
"visible": self.if_f0 != 0, "visible": self.if_f0 != 0,
"value": to_return_protect0 if self.if_f0 != 0 else 0.5, "value": to_return_protect[0]
if self.if_f0 != 0 and to_return_protect
else 0.5,
"__type__": "update", "__type__": "update",
} }
to_return_protect1 = { to_return_protect1 = {
"visible": self.if_f0 != 0, "visible": self.if_f0 != 0,
"value": to_return_protect1 if self.if_f0 != 0 else 0.33, "value": to_return_protect[1]
if self.if_f0 != 0 and to_return_protect
else 0.33,
"__type__": "update", "__type__": "update",
} }
@ -75,12 +81,16 @@ class VC:
index = {"value": get_index_path_from_model(sid), "__type__": "update"} index = {"value": get_index_path_from_model(sid), "__type__": "update"}
return ( return (
(
{"visible": True, "maximum": n_spk, "__type__": "update"}, {"visible": True, "maximum": n_spk, "__type__": "update"},
to_return_protect0, to_return_protect0,
to_return_protect1, to_return_protect1,
index, index,
index, index,
) )
if to_return_protect
else {"visible": True, "maximum": n_spk, "__type__": "update"}
)
def vc_single( def vc_single(
self, self,