Format code (#274)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
github-actions[bot] 2023-05-12 19:43:05 +00:00 committed by GitHub
parent 568378761b
commit af41184320
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 49 deletions

View File

@ -18,9 +18,12 @@ from fairseq import checkpoint_utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():device="cuda" if torch.cuda.is_available():
elif torch.backends.mps.is_available():device="mps" device = "cuda"
else:device="cpu" elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
f = open("%s/extract_f0_feature.log" % exp_dir, "a+") f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
@ -64,7 +67,7 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
model = models[0] model = models[0]
model = model.to(device) model = model.to(device)
printt("move model to %s" % device) printt("move model to %s" % device)
if device not in ["mps","cpu"]: if device not in ["mps", "cpu"]:
model = model.half() model = model.half()
model.eval() model.eval()

View File

@ -11,8 +11,8 @@ now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
tmp = os.path.join(now_dir, "TEMP") tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack"%(now_dir), ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack"%(now_dir) , ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
os.makedirs(tmp, exist_ok=True) os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
@ -121,11 +121,11 @@ names = []
for name in os.listdir(weight_root): for name in os.listdir(weight_root):
if name.endswith(".pth"): if name.endswith(".pth"):
names.append(name) names.append(name)
index_paths=[] index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False): for root, dirs, files in os.walk(index_root, topdown=False):
for name in files: for name in files:
if name.endswith(".index") and "trained" not in name: if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s"%(root,name)) index_paths.append("%s/%s" % (root, name))
uvr5_names = [] uvr5_names = []
for name in os.listdir(weight_uvr5_root): for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth"): if name.endswith(".pth"):
@ -156,13 +156,17 @@ def vc_single(
load_hubert() load_hubert()
if_f0 = cpt.get("f0", 1) if_f0 = cpt.get("f0", 1)
file_index = ( file_index = (
file_index.strip(" ") (
.strip('"') file_index.strip(" ")
.strip("\n") .strip('"')
.strip('"') .strip("\n")
.strip(" ") .strip('"')
.replace("trained", "added") .strip(" ")
)if file_index!=""else file_index2 # 防止小白写错,自动帮他替换掉 .replace("trained", "added")
)
if file_index != ""
else file_index2
) # 防止小白写错,自动帮他替换掉
# file_big_npy = ( # file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# ) # )
@ -184,10 +188,19 @@ def vc_single(
resample_sr, resample_sr,
f0_file=f0_file, f0_file=f0_file,
) )
if(resample_sr>=16000 and tgt_sr!=resample_sr): if resample_sr >= 16000 and tgt_sr != resample_sr:
tgt_sr=resample_sr tgt_sr = resample_sr
index_info="Using index:%s."%file_index if os.path.exists(file_index)else"Index not used." index_info = (
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss"%(index_info,times[0],times[1],times[2]), (tgt_sr, audio_opt) "Using index:%s." % file_index
if os.path.exists(file_index)
else "Index not used."
)
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
index_info,
times[0],
times[1],
times[2],
), (tgt_sr, audio_opt)
except: except:
info = traceback.format_exc() info = traceback.format_exc()
print(info) print(info)
@ -237,7 +250,7 @@ def vc_multi(
filter_radius, filter_radius,
resample_sr, resample_sr,
) )
if "Success"in info: if "Success" in info:
try: try:
tgt_sr, audio_opt = opt tgt_sr, audio_opt = opt
wavfile.write( wavfile.write(
@ -323,7 +336,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
# 一个选项卡全局只能有一个音色 # 一个选项卡全局只能有一个音色
def get_vc(sid): def get_vc(sid):
global n_spk, tgt_sr, net_g, vc, cpt global n_spk, tgt_sr, net_g, vc, cpt
if sid == ""or sid==[]: if sid == "" or sid == []:
global hubert_model global hubert_model
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print("clean_empty_cache") print("clean_empty_cache")
@ -371,12 +384,15 @@ def change_choices():
for name in os.listdir(weight_root): for name in os.listdir(weight_root):
if name.endswith(".pth"): if name.endswith(".pth"):
names.append(name) names.append(name)
index_paths=[] index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False): for root, dirs, files in os.walk(index_root, topdown=False):
for name in files: for name in files:
if name.endswith(".index") and "trained" not in name: if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name)) index_paths.append("%s/%s" % (root, name))
return {"choices": sorted(names), "__type__": "update"},{"choices": sorted(index_paths), "__type__": "update"} return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
def clean(): def clean():
@ -1096,7 +1112,7 @@ with gr.Blocks() as app:
value="pm", value="pm",
interactive=True, interactive=True,
) )
filter_radius0=gr.Slider( filter_radius0 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1115,7 +1131,9 @@ with gr.Blocks() as app:
choices=sorted(index_paths), choices=sorted(index_paths),
interactive=True, interactive=True,
) )
refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0, file_index2]) refresh_button.click(
fn=change_choices, inputs=[], outputs=[sid0, file_index2]
)
# file_big_npy1 = gr.Textbox( # file_big_npy1 = gr.Textbox(
# label=i18n("特征文件路径"), # label=i18n("特征文件路径"),
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
@ -1128,7 +1146,7 @@ with gr.Blocks() as app:
value=0.76, value=0.76,
interactive=True, interactive=True,
) )
resample_sr0=gr.Slider( resample_sr0 = gr.Slider(
minimum=0, minimum=0,
maximum=48000, maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"), label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1154,7 +1172,7 @@ with gr.Blocks() as app:
# file_big_npy1, # file_big_npy1,
index_rate1, index_rate1,
filter_radius0, filter_radius0,
resample_sr0 resample_sr0,
], ],
[vc_output1, vc_output2], [vc_output1, vc_output2],
) )
@ -1174,7 +1192,7 @@ with gr.Blocks() as app:
value="pm", value="pm",
interactive=True, interactive=True,
) )
filter_radius1=gr.Slider( filter_radius1 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1205,7 +1223,7 @@ with gr.Blocks() as app:
value=1, value=1,
interactive=True, interactive=True,
) )
resample_sr1=gr.Slider( resample_sr1 = gr.Slider(
minimum=0, minimum=0,
maximum=48000, maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"), label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1237,7 +1255,7 @@ with gr.Blocks() as app:
# file_big_npy2, # file_big_npy2,
index_rate2, index_rate2,
filter_radius1, filter_radius1,
resample_sr1 resample_sr1,
], ],
[vc_output3], [vc_output3],
) )
@ -1335,7 +1353,7 @@ with gr.Blocks() as app:
but1 = gr.Button(i18n("处理数据"), variant="primary") but1 = gr.Button(i18n("处理数据"), variant="primary")
info1 = gr.Textbox(label=i18n("输出信息"), value="") info1 = gr.Textbox(label=i18n("输出信息"), value="")
but1.click( but1.click(
preprocess_dataset, [trainset_dir4, exp_dir1, sr2,np7], [info1] preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
) )
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
@ -1597,16 +1615,16 @@ with gr.Blocks() as app:
butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary") butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary")
butOnnx.click(export_onnx, [ckpt_dir, onnx_dir, moevs], infoOnnx) butOnnx.click(export_onnx, [ckpt_dir, onnx_dir, moevs], infoOnnx)
tab_faq=i18n("常见问题解答") tab_faq = i18n("常见问题解答")
with gr.TabItem(tab_faq): with gr.TabItem(tab_faq):
try: try:
if(tab_faq=="常见问题解答"): if tab_faq == "常见问题解答":
with open("docs/faq.md","r",encoding="utf8")as f:info=f.read() with open("docs/faq.md", "r", encoding="utf8") as f:
info = f.read()
else: else:
with open("docs/faq_en.md", "r")as f:info = f.read() with open("docs/faq_en.md", "r") as f:
gr.Markdown( info = f.read()
value=info gr.Markdown(value=info)
)
except: except:
gr.Markdown(traceback.format_exc()) gr.Markdown(traceback.format_exc())

View File

@ -2,16 +2,18 @@ import numpy as np, parselmouth, torch, pdb
from time import time as ttime from time import time as ttime
import torch.nn.functional as F import torch.nn.functional as F
import scipy.signal as signal import scipy.signal as signal
import pyworld, os, traceback, faiss,librosa import pyworld, os, traceback, faiss, librosa
from scipy import signal from scipy import signal
from functools import lru_cache from functools import lru_cache
bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
input_audio_path2wav={} input_audio_path2wav = {}
@lru_cache @lru_cache
def cache_harvest_f0(input_audio_path,fs,f0max,f0min,frame_period): def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
audio=input_audio_path2wav[input_audio_path] audio = input_audio_path2wav[input_audio_path]
f0, t = pyworld.harvest( f0, t = pyworld.harvest(
audio, audio,
fs=fs, fs=fs,
@ -22,6 +24,7 @@ def cache_harvest_f0(input_audio_path,fs,f0max,f0min,frame_period):
f0 = pyworld.stonemask(audio, f0, t, fs) f0 = pyworld.stonemask(audio, f0, t, fs)
return f0 return f0
class VC(object): class VC(object):
def __init__(self, tgt_sr, config): def __init__(self, tgt_sr, config):
self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = (
@ -41,7 +44,16 @@ class VC(object):
self.t_max = self.sr * self.x_max # 免查询时长阈值 self.t_max = self.sr * self.x_max # 免查询时长阈值
self.device = config.device self.device = config.device
def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None): def get_f0(
self,
input_audio_path,
x,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0=None,
):
global input_audio_path2wav global input_audio_path2wav
time_step = self.window / self.sr * 1000 time_step = self.window / self.sr * 1000
f0_min = 50 f0_min = 50
@ -65,9 +77,9 @@ class VC(object):
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
) )
elif f0_method == "harvest": elif f0_method == "harvest":
input_audio_path2wav[input_audio_path]=x.astype(np.double) input_audio_path2wav[input_audio_path] = x.astype(np.double)
f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10) f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if(filter_radius>2): if filter_radius > 2:
f0 = signal.medfilt(f0, 3) f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12) f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
@ -255,7 +267,15 @@ class VC(object):
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
pitch, pitchf = None, None pitch, pitchf = None, None
if if_f0 == 1: if if_f0 == 1:
pitch, pitchf = self.get_f0(input_audio_path,audio_pad, p_len, f0_up_key, f0_method,filter_radius, inp_f0) pitch, pitchf = self.get_f0(
input_audio_path,
audio_pad,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0,
)
pitch = pitch[:p_len] pitch = pitch[:p_len]
pitchf = pitchf[:p_len] pitchf = pitchf[:p_len]
if self.device == "mps": if self.device == "mps":
@ -328,11 +348,11 @@ class VC(object):
)[self.t_pad_tgt : -self.t_pad_tgt] )[self.t_pad_tgt : -self.t_pad_tgt]
) )
audio_opt = np.concatenate(audio_opt) audio_opt = np.concatenate(audio_opt)
if(resample_sr>=16000 and tgt_sr!=resample_sr): if resample_sr >= 16000 and tgt_sr != resample_sr:
audio_opt = librosa.resample( audio_opt = librosa.resample(
audio_opt, orig_sr=tgt_sr, target_sr=resample_sr audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
) )
audio_opt=audio_opt.astype(np.int16) audio_opt = audio_opt.astype(np.int16)
del pitch, pitchf, sid del pitch, pitchf, sid
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() torch.cuda.empty_cache()