Format code (#275)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
github-actions[bot] 2023-05-14 07:52:36 +00:00 committed by GitHub
parent 32437314b8
commit 6a3eaef090
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 295 additions and 158 deletions

View File

@ -1,4 +1,5 @@
import os, traceback, sys, parselmouth import os, traceback, sys, parselmouth
now_dir = os.getcwd() now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
from my_utils import load_audio from my_utils import load_audio
@ -35,7 +36,7 @@ class FeatureInput(object):
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
def compute_f0(self, path, f0_method): def compute_f0(self, path, f0_method):
x=load_audio(path,self.fs) x = load_audio(path, self.fs)
p_len = x.shape[0] // self.hop p_len = x.shape[0] // self.hop
if f0_method == "pm": if f0_method == "pm":
time_step = 160 / 16000 * 1000 time_step = 160 / 16000 * 1000

View File

@ -18,9 +18,12 @@ from fairseq import checkpoint_utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():device="cuda" if torch.cuda.is_available():
elif torch.backends.mps.is_available():device="mps" device = "cuda"
else:device="cpu" elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
f = open("%s/extract_f0_feature.log" % exp_dir, "a+") f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
@ -36,7 +39,9 @@ model_path = "hubert_base.pt"
printt(exp_dir) printt(exp_dir)
wavPath = "%s/1_16k_wavs" % exp_dir wavPath = "%s/1_16k_wavs" % exp_dir
outPath = "%s/3_feature256" % exp_dir if version=="v1"else "%s/3_feature768" % exp_dir outPath = (
"%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir
)
os.makedirs(outPath, exist_ok=True) os.makedirs(outPath, exist_ok=True)
@ -64,7 +69,7 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
model = models[0] model = models[0]
model = model.to(device) model = model.to(device)
printt("move model to %s" % device) printt("move model to %s" % device)
if device not in ["mps","cpu"]: if device not in ["mps", "cpu"]:
model = model.half() model = model.half()
model.eval() model.eval()
@ -90,11 +95,13 @@ else:
if device not in ["mps", "cpu"] if device not in ["mps", "cpu"]
else feats.to(device), else feats.to(device),
"padding_mask": padding_mask.to(device), "padding_mask": padding_mask.to(device),
"output_layer": 9 if version=="v1"else 12, # layer 9 "output_layer": 9 if version == "v1" else 12, # layer 9
} }
with torch.no_grad(): with torch.no_grad():
logits = model.extract_features(**inputs) logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0])if version=="v1"else logits[0] feats = (
model.final_proj(logits[0]) if version == "v1" else logits[0]
)
feats = feats.squeeze(0).float().cpu().numpy() feats = feats.squeeze(0).float().cpu().numpy()
if np.isnan(feats).sum() == 0: if np.isnan(feats).sum() == 0:

View File

@ -1,5 +1,6 @@
import torch, os, traceback, sys, warnings, shutil, numpy as np import torch, os, traceback, sys, warnings, shutil, numpy as np
os.environ["no_proxy"]="localhost, 127.0.0.1, ::1"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
from multiprocessing import cpu_count from multiprocessing import cpu_count
import threading import threading
from time import sleep from time import sleep
@ -11,8 +12,8 @@ now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
tmp = os.path.join(now_dir, "TEMP") tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack"%(now_dir), ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack"%(now_dir) , ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
os.makedirs(tmp, exist_ok=True) os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
@ -70,7 +71,12 @@ else:
gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
default_batch_size = 1 default_batch_size = 1
gpus = "-".join([i[0] for i in gpu_infos]) gpus = "-".join([i[0] for i in gpu_infos])
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono,SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono from infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
from scipy.io import wavfile from scipy.io import wavfile
from fairseq import checkpoint_utils from fairseq import checkpoint_utils
import gradio as gr import gradio as gr
@ -121,11 +127,11 @@ names = []
for name in os.listdir(weight_root): for name in os.listdir(weight_root):
if name.endswith(".pth"): if name.endswith(".pth"):
names.append(name) names.append(name)
index_paths=[] index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False): for root, dirs, files in os.walk(index_root, topdown=False):
for name in files: for name in files:
if name.endswith(".index") and "trained" not in name: if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s"%(root,name)) index_paths.append("%s/%s" % (root, name))
uvr5_names = [] uvr5_names = []
for name in os.listdir(weight_uvr5_root): for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth"): if name.endswith(".pth"):
@ -144,29 +150,33 @@ def vc_single(
index_rate, index_rate,
filter_radius, filter_radius,
resample_sr, resample_sr,
rms_mix_rate rms_mix_rate,
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
global tgt_sr, net_g, vc, hubert_model,version global tgt_sr, net_g, vc, hubert_model, version
if input_audio_path is None: if input_audio_path is None:
return "You need to upload an audio", None return "You need to upload an audio", None
f0_up_key = int(f0_up_key) f0_up_key = int(f0_up_key)
try: try:
audio = load_audio(input_audio_path, 16000) audio = load_audio(input_audio_path, 16000)
audio_max=np.abs(audio).max()/0.95 audio_max = np.abs(audio).max() / 0.95
if(audio_max>1): if audio_max > 1:
audio/=audio_max audio /= audio_max
times = [0, 0, 0] times = [0, 0, 0]
if hubert_model == None: if hubert_model == None:
load_hubert() load_hubert()
if_f0 = cpt.get("f0", 1) if_f0 = cpt.get("f0", 1)
file_index = ( file_index = (
file_index.strip(" ") (
.strip('"') file_index.strip(" ")
.strip("\n") .strip('"')
.strip('"') .strip("\n")
.strip(" ") .strip('"')
.replace("trained", "added") .strip(" ")
)if file_index!=""else file_index2 # 防止小白写错,自动帮他替换掉 .replace("trained", "added")
)
if file_index != ""
else file_index2
) # 防止小白写错,自动帮他替换掉
# file_big_npy = ( # file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# ) # )
@ -190,10 +200,19 @@ def vc_single(
version, version,
f0_file=f0_file, f0_file=f0_file,
) )
if(resample_sr>=16000 and tgt_sr!=resample_sr): if resample_sr >= 16000 and tgt_sr != resample_sr:
tgt_sr=resample_sr tgt_sr = resample_sr
index_info="Using index:%s."%file_index if os.path.exists(file_index)else"Index not used." index_info = (
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss"%(index_info,times[0],times[1],times[2]), (tgt_sr, audio_opt) "Using index:%s." % file_index
if os.path.exists(file_index)
else "Index not used."
)
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
index_info,
times[0],
times[1],
times[2],
), (tgt_sr, audio_opt)
except: except:
info = traceback.format_exc() info = traceback.format_exc()
print(info) print(info)
@ -213,7 +232,7 @@ def vc_multi(
index_rate, index_rate,
filter_radius, filter_radius,
resample_sr, resample_sr,
rms_mix_rate rms_mix_rate,
): ):
try: try:
dir_path = ( dir_path = (
@ -243,9 +262,9 @@ def vc_multi(
index_rate, index_rate,
filter_radius, filter_radius,
resample_sr, resample_sr,
rms_mix_rate rms_mix_rate,
) )
if "Success"in info: if "Success" in info:
try: try:
tgt_sr, audio_opt = opt tgt_sr, audio_opt = opt
wavfile.write( wavfile.write(
@ -330,8 +349,8 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
# 一个选项卡全局只能有一个音色 # 一个选项卡全局只能有一个音色
def get_vc(sid): def get_vc(sid):
global n_spk, tgt_sr, net_g, vc, cpt,version global n_spk, tgt_sr, net_g, vc, cpt, version
if sid == ""or sid==[]: if sid == "" or sid == []:
global hubert_model global hubert_model
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print("clean_empty_cache") print("clean_empty_cache")
@ -342,14 +361,18 @@ def get_vc(sid):
###楼下不这么折腾清理不干净 ###楼下不这么折腾清理不干净
if_f0 = cpt.get("f0", 1) if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1") version = cpt.get("version", "v1")
if (version == "v1"): if version == "v1":
if if_f0 == 1: if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else: else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif (version == "v2"): elif version == "v2":
if if_f0 == 1: if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=config.is_half
)
else: else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g, cpt del net_g, cpt
@ -364,12 +387,12 @@ def get_vc(sid):
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
if_f0 = cpt.get("f0", 1) if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1") version = cpt.get("version", "v1")
if(version=="v1"): if version == "v1":
if if_f0 == 1: if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else: else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif(version=="v2"): elif version == "v2":
if if_f0 == 1: if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else: else:
@ -391,17 +414,21 @@ def change_choices():
for name in os.listdir(weight_root): for name in os.listdir(weight_root):
if name.endswith(".pth"): if name.endswith(".pth"):
names.append(name) names.append(name)
index_paths=[] index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False): for root, dirs, files in os.walk(index_root, topdown=False):
for name in files: for name in files:
if name.endswith(".index") and "trained" not in name: if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name)) index_paths.append("%s/%s" % (root, name))
return {"choices": sorted(names), "__type__": "update"},{"choices": sorted(index_paths), "__type__": "update"} return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
def clean(): def clean():
return {"value": "", "__type__": "update"} return {"value": "", "__type__": "update"}
sr_dict = { sr_dict = {
"32k": 32000, "32k": 32000,
"40k": 40000, "40k": 40000,
@ -468,7 +495,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir,version19): def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19):
gpus = gpus.split("-") gpus = gpus.split("-")
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
@ -514,14 +541,18 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir,version19):
leng = len(gpus) leng = len(gpus)
ps = [] ps = []
for idx, n_g in enumerate(gpus): for idx, n_g in enumerate(gpus):
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s %s" % ( cmd = (
config.device, config.python_cmd
leng, + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
idx, % (
n_g, config.device,
now_dir, leng,
exp_dir, idx,
version19, n_g,
now_dir,
exp_dir,
version19,
)
) )
print(cmd) print(cmd)
p = Popen( p = Popen(
@ -549,34 +580,47 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir,version19):
yield log yield log
def change_sr2(sr2, if_f0_3,version19): def change_sr2(sr2, if_f0_3, version19):
vis_v=True if sr2=="40k"else False vis_v = True if sr2 == "40k" else False
if(sr2!="40k"):version19="v1" if sr2 != "40k":
path_str=""if version19=="v1"else "_v2" version19 = "v1"
version_state={"visible": vis_v, "__type__": "update"} path_str = "" if version19 == "v1" else "_v2"
if(vis_v==False):version_state["value"]="v1" version_state = {"visible": vis_v, "__type__": "update"}
f0_str="f0"if if_f0_3 else"" if vis_v == False:
return "pretrained%s/%sG%s.pth" % (path_str,f0_str,sr2), "pretrained%s/%sD%s.pth" % (path_str,f0_str,sr2),version_state version_state["value"] = "v1"
f0_str = "f0" if if_f0_3 else ""
return (
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
version_state,
)
def change_version19(sr2,if_f0_3,version19):
path_str=""if version19=="v1"else "_v2"
f0_str="f0"if if_f0_3 else""
return "pretrained%s/%sG%s.pth" % (path_str,f0_str,sr2), "pretrained%s/%sD%s.pth" % (path_str,f0_str,sr2)
def change_f0(if_f0_3, sr2,version19): # f0method8,pretrained_G14,pretrained_D15 def change_version19(sr2, if_f0_3, version19):
path_str=""if version19=="v1"else "_v2" path_str = "" if version19 == "v1" else "_v2"
f0_str = "f0" if if_f0_3 else ""
return "pretrained%s/%sG%s.pth" % (
path_str,
f0_str,
sr2,
), "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
path_str = "" if version19 == "v1" else "_v2"
if if_f0_3: if if_f0_3:
return ( return (
{"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"},
"pretrained%s/f0G%s.pth" % (path_str,sr2), "pretrained%s/f0G%s.pth" % (path_str, sr2),
"pretrained%s/f0D%s.pth" % (path_str,sr2), "pretrained%s/f0D%s.pth" % (path_str, sr2),
) )
return ( return (
{"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"},
"pretrained%s/G%s.pth" % (path_str,sr2), "pretrained%s/G%s.pth" % (path_str, sr2),
"pretrained%s/D%s.pth" % (path_str,sr2), "pretrained%s/D%s.pth" % (path_str, sr2),
) )
# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
def click_train( def click_train(
exp_dir1, exp_dir1,
@ -598,7 +642,11 @@ def click_train(
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
os.makedirs(exp_dir, exist_ok=True) os.makedirs(exp_dir, exist_ok=True)
gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
feature_dir = "%s/3_feature256" % (exp_dir)if version19=="v1"else "%s/3_feature768" % (exp_dir) feature_dir = (
"%s/3_feature256" % (exp_dir)
if version19 == "v1"
else "%s/3_feature768" % (exp_dir)
)
if if_f0_3: if if_f0_3:
f0_dir = "%s/2a_f0" % (exp_dir) f0_dir = "%s/2a_f0" % (exp_dir)
f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
@ -640,18 +688,18 @@ def click_train(
spk_id5, spk_id5,
) )
) )
fea_dim = 256 if version19 == "v1"else 768 fea_dim = 256 if version19 == "v1" else 768
if if_f0_3: if if_f0_3:
for _ in range(2): for _ in range(2):
opt.append( opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, now_dir, now_dir, spk_id5) % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
) )
else: else:
for _ in range(2): for _ in range(2):
opt.append( opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, spk_id5) % (now_dir, sr2, now_dir, fea_dim, spk_id5)
) )
shuffle(opt) shuffle(opt)
with open("%s/filelist.txt" % exp_dir, "w") as f: with open("%s/filelist.txt" % exp_dir, "w") as f:
@ -706,10 +754,14 @@ def click_train(
# but4.click(train_index, [exp_dir1], info3) # but4.click(train_index, [exp_dir1], info3)
def train_index(exp_dir1,version19): def train_index(exp_dir1, version19):
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
os.makedirs(exp_dir, exist_ok=True) os.makedirs(exp_dir, exist_ok=True)
feature_dir = "%s/3_feature256" % (exp_dir)if version19=="v1"else "%s/3_feature768" % (exp_dir) feature_dir = (
"%s/3_feature256" % (exp_dir)
if version19 == "v1"
else "%s/3_feature768" % (exp_dir)
)
if os.path.exists(feature_dir) == False: if os.path.exists(feature_dir) == False:
return "请先进行特征提取!" return "请先进行特征提取!"
listdir_res = list(os.listdir(feature_dir)) listdir_res = list(os.listdir(feature_dir))
@ -729,7 +781,7 @@ def train_index(exp_dir1,version19):
infos = [] infos = []
infos.append("%s,%s" % (big_npy.shape, n_ivf)) infos.append("%s,%s" % (big_npy.shape, n_ivf))
yield "\n".join(infos) yield "\n".join(infos)
index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,Flat" % n_ivf) index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
# index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
infos.append("training") infos.append("training")
yield "\n".join(infos) yield "\n".join(infos)
@ -738,7 +790,8 @@ def train_index(exp_dir1,version19):
index.train(big_npy) index.train(big_npy)
faiss.write_index( faiss.write_index(
index, index,
"%s/trained_IVF%s_Flat_nprobe_%s_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe,version19), "%s/trained_IVF%s_Flat_nprobe_%s_%s.index"
% (exp_dir, n_ivf, index_ivf.nprobe, version19),
) )
# faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
infos.append("adding") infos.append("adding")
@ -750,7 +803,10 @@ def train_index(exp_dir1,version19):
index, index,
"%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe), "%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe),
) )
infos.append("成功构建索引added_IVF%s_Flat_nprobe_%s_%s.index" % (n_ivf, index_ivf.nprobe,version19)) infos.append(
"成功构建索引added_IVF%s_Flat_nprobe_%s_%s.index"
% (n_ivf, index_ivf.nprobe, version19)
)
# faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
# infos.append("成功构建索引added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) # infos.append("成功构建索引added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
yield "\n".join(infos) yield "\n".join(infos)
@ -786,7 +842,11 @@ def train1key(
preprocess_log_path = "%s/preprocess.log" % model_log_dir preprocess_log_path = "%s/preprocess.log" % model_log_dir
extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
feature_dir = "%s/3_feature256" % model_log_dir if version19=="v1"else "%s/3_feature768" % model_log_dir feature_dir = (
"%s/3_feature256" % model_log_dir
if version19 == "v1"
else "%s/3_feature768" % model_log_dir
)
os.makedirs(model_log_dir, exist_ok=True) os.makedirs(model_log_dir, exist_ok=True)
#########step1:处理数据 #########step1:处理数据
@ -830,7 +890,8 @@ def train1key(
leng, leng,
idx, idx,
n_g, n_g,
model_log_dir,version19, model_log_dir,
version19,
) )
yield get_info_str(cmd) yield get_info_str(cmd)
p = Popen( p = Popen(
@ -885,18 +946,18 @@ def train1key(
spk_id5, spk_id5,
) )
) )
fea_dim=256 if version19=="v1"else 768 fea_dim = 256 if version19 == "v1" else 768
if if_f0_3: if if_f0_3:
for _ in range(2): for _ in range(2):
opt.append( opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, now_dir, now_dir, spk_id5) % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
) )
else: else:
for _ in range(2): for _ in range(2):
opt.append( opt.append(
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
% (now_dir, sr2, now_dir,fea_dim, spk_id5) % (now_dir, sr2, now_dir, fea_dim, spk_id5)
) )
shuffle(opt) shuffle(opt)
with open("%s/filelist.txt" % model_log_dir, "w") as f: with open("%s/filelist.txt" % model_log_dir, "w") as f:
@ -961,7 +1022,7 @@ def train1key(
# n_ivf = big_npy.shape[0] // 39 # n_ivf = big_npy.shape[0] // 39
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
index = faiss.index_factory(256 if version19=="v1"else 768, "IVF%s,Flat" % n_ivf) index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
yield get_info_str("training index") yield get_info_str("training index")
index_ivf = faiss.extract_index_ivf(index) # index_ivf = faiss.extract_index_ivf(index) #
index_ivf.nprobe = 1 index_ivf.nprobe = 1
@ -969,7 +1030,7 @@ def train1key(
faiss.write_index( faiss.write_index(
index, index,
"%s/trained_IVF%s_Flat_nprobe_%s_%s.index" "%s/trained_IVF%s_Flat_nprobe_%s_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe,version19), % (model_log_dir, n_ivf, index_ivf.nprobe, version19),
) )
yield get_info_str("adding index") yield get_info_str("adding index")
batch_size_add = 8192 batch_size_add = 8192
@ -978,10 +1039,11 @@ def train1key(
faiss.write_index( faiss.write_index(
index, index,
"%s/added_IVF%s_Flat_nprobe_%s_%s.index" "%s/added_IVF%s_Flat_nprobe_%s_%s.index"
% (model_log_dir, n_ivf, index_ivf.nprobe,version19), % (model_log_dir, n_ivf, index_ivf.nprobe, version19),
) )
yield get_info_str( yield get_info_str(
"成功构建索引, added_IVF%s_Flat_nprobe_%s_%s.index" % (n_ivf, index_ivf.nprobe,version19) "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s.index"
% (n_ivf, index_ivf.nprobe, version19)
) )
yield get_info_str(i18n("全流程结束!")) yield get_info_str(i18n("全流程结束!"))
@ -999,8 +1061,8 @@ def change_info_(ckpt_path):
) as f: ) as f:
info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
sr, f0 = info["sample_rate"], info["if_f0"] sr, f0 = info["sample_rate"], info["if_f0"]
version="v2"if("version"in info and info["version"]=="v2")else"v1" version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
return sr, str(f0),version return sr, str(f0), version
except: except:
traceback.print_exc() traceback.print_exc()
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
@ -1136,7 +1198,7 @@ with gr.Blocks() as app:
value="pm", value="pm",
interactive=True, interactive=True,
) )
filter_radius0=gr.Slider( filter_radius0 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1155,7 +1217,9 @@ with gr.Blocks() as app:
choices=sorted(index_paths), choices=sorted(index_paths),
interactive=True, interactive=True,
) )
refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0, file_index2]) refresh_button.click(
fn=change_choices, inputs=[], outputs=[sid0, file_index2]
)
# file_big_npy1 = gr.Textbox( # file_big_npy1 = gr.Textbox(
# label=i18n("特征文件路径"), # label=i18n("特征文件路径"),
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
@ -1168,7 +1232,7 @@ with gr.Blocks() as app:
value=0.76, value=0.76,
interactive=True, interactive=True,
) )
resample_sr0=gr.Slider( resample_sr0 = gr.Slider(
minimum=0, minimum=0,
maximum=48000, maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"), label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1202,7 +1266,7 @@ with gr.Blocks() as app:
index_rate1, index_rate1,
filter_radius0, filter_radius0,
resample_sr0, resample_sr0,
rms_mix_rate0 rms_mix_rate0,
], ],
[vc_output1, vc_output2], [vc_output1, vc_output2],
) )
@ -1222,7 +1286,7 @@ with gr.Blocks() as app:
value="pm", value="pm",
interactive=True, interactive=True,
) )
filter_radius1=gr.Slider( filter_radius1 = gr.Slider(
minimum=0, minimum=0,
maximum=7, maximum=7,
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"), label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音"),
@ -1253,7 +1317,7 @@ with gr.Blocks() as app:
value=1, value=1,
interactive=True, interactive=True,
) )
resample_sr1=gr.Slider( resample_sr1 = gr.Slider(
minimum=0, minimum=0,
maximum=48000, maximum=48000,
label=i18n("后处理重采样至最终采样率0为不进行重采样"), label=i18n("后处理重采样至最终采样率0为不进行重采样"),
@ -1293,7 +1357,7 @@ with gr.Blocks() as app:
index_rate2, index_rate2,
filter_radius1, filter_radius1,
resample_sr1, resample_sr1,
rms_mix_rate1 rms_mix_rate1,
], ],
[vc_output3], [vc_output3],
) )
@ -1398,7 +1462,7 @@ with gr.Blocks() as app:
but1 = gr.Button(i18n("处理数据"), variant="primary") but1 = gr.Button(i18n("处理数据"), variant="primary")
info1 = gr.Textbox(label=i18n("输出信息"), value="") info1 = gr.Textbox(label=i18n("输出信息"), value="")
but1.click( but1.click(
preprocess_dataset, [trainset_dir4, exp_dir1, sr2,np7], [info1] preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
) )
with gr.Group(): with gr.Group():
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
@ -1423,7 +1487,7 @@ with gr.Blocks() as app:
info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
but2.click( but2.click(
extract_f0_feature, extract_f0_feature,
[gpus6, np7, f0method8, if_f0_3, exp_dir1,version19], [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19],
[info2], [info2],
) )
with gr.Group(): with gr.Group():
@ -1468,9 +1532,7 @@ with gr.Blocks() as app:
interactive=True, interactive=True,
) )
if_save_every_weights18 = gr.Radio( if_save_every_weights18 = gr.Radio(
label=i18n( label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
"是否在每次保存时间点将最终小模型保存至weights文件夹"
),
choices=[i18n(""), i18n("")], choices=[i18n(""), i18n("")],
value=i18n(""), value=i18n(""),
interactive=True, interactive=True,
@ -1487,14 +1549,18 @@ with gr.Blocks() as app:
interactive=True, interactive=True,
) )
sr2.change( sr2.change(
change_sr2, [sr2, if_f0_3,version19], [pretrained_G14, pretrained_D15,version19] change_sr2,
[sr2, if_f0_3, version19],
[pretrained_G14, pretrained_D15, version19],
) )
version19.change( version19.change(
change_version19, [sr2, if_f0_3,version19], [pretrained_G14, pretrained_D15] change_version19,
[sr2, if_f0_3, version19],
[pretrained_G14, pretrained_D15],
) )
if_f0_3.change( if_f0_3.change(
change_f0, change_f0,
[if_f0_3, sr2,version19], [if_f0_3, sr2, version19],
[f0method8, pretrained_G14, pretrained_D15], [f0method8, pretrained_G14, pretrained_D15],
) )
gpus16 = gr.Textbox( gpus16 = gr.Textbox(
@ -1526,7 +1592,7 @@ with gr.Blocks() as app:
], ],
info3, info3,
) )
but4.click(train_index, [exp_dir1,version19], info3) but4.click(train_index, [exp_dir1, version19], info3)
but5.click( but5.click(
train1key, train1key,
[ [
@ -1586,7 +1652,7 @@ with gr.Blocks() as app:
max_lines=1, max_lines=1,
interactive=True, interactive=True,
) )
version_2=gr.Radio( version_2 = gr.Radio(
label=i18n("模型版本型号"), label=i18n("模型版本型号"),
choices=["v1", "v2"], choices=["v1", "v2"],
value="v1", value="v1",
@ -1597,7 +1663,16 @@ with gr.Blocks() as app:
info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
but6.click( but6.click(
merge, merge,
[ckpt_a, ckpt_b, alpha_a, sr_, if_f0_, info__, name_to_save0,version_2], [
ckpt_a,
ckpt_b,
alpha_a,
sr_,
if_f0_,
info__,
name_to_save0,
version_2,
],
info4, info4,
) # def merge(path1,path2,alpha1,sr,f0,info): ) # def merge(path1,path2,alpha1,sr,f0,info):
with gr.Group(): with gr.Group():
@ -1655,7 +1730,7 @@ with gr.Blocks() as app:
value="1", value="1",
interactive=True, interactive=True,
) )
version_1=gr.Radio( version_1 = gr.Radio(
label=i18n("模型版本型号"), label=i18n("模型版本型号"),
choices=["v1", "v2"], choices=["v1", "v2"],
value="v1", value="v1",
@ -1666,10 +1741,12 @@ with gr.Blocks() as app:
) )
but9 = gr.Button(i18n("提取"), variant="primary") but9 = gr.Button(i18n("提取"), variant="primary")
info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
ckpt_path2.change(change_info_, [ckpt_path2], [sr__, if_f0__,version_1]) ckpt_path2.change(
change_info_, [ckpt_path2], [sr__, if_f0__, version_1]
)
but9.click( but9.click(
extract_small_model, extract_small_model,
[ckpt_path2, save_name, sr__, if_f0__, info___,version_1], [ckpt_path2, save_name, sr__, if_f0__, info___, version_1],
info7, info7,
) )
@ -1687,16 +1764,16 @@ with gr.Blocks() as app:
butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary") butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary")
butOnnx.click(export_onnx, [ckpt_dir, onnx_dir, moevs], infoOnnx) butOnnx.click(export_onnx, [ckpt_dir, onnx_dir, moevs], infoOnnx)
tab_faq=i18n("常见问题解答") tab_faq = i18n("常见问题解答")
with gr.TabItem(tab_faq): with gr.TabItem(tab_faq):
try: try:
if(tab_faq=="常见问题解答"): if tab_faq == "常见问题解答":
with open("docs/faq.md","r",encoding="utf8")as f:info=f.read() with open("docs/faq.md", "r", encoding="utf8") as f:
info = f.read()
else: else:
with open("docs/faq_en.md", "r")as f:info = f.read() with open("docs/faq_en.md", "r") as f:
gr.Markdown( info = f.read()
value=info gr.Markdown(value=info)
)
except: except:
gr.Markdown(traceback.format_exc()) gr.Markdown(traceback.format_exc())

View File

@ -59,6 +59,8 @@ class TextEncoder256(nn.Module):
m, logs = torch.split(stats, self.out_channels, dim=1) m, logs = torch.split(stats, self.out_channels, dim=1)
return m, logs, x_mask return m, logs, x_mask
class TextEncoder768(nn.Module): class TextEncoder768(nn.Module):
def __init__( def __init__(
self, self,
@ -105,6 +107,7 @@ class TextEncoder768(nn.Module):
m, logs = torch.split(stats, self.out_channels, dim=1) m, logs = torch.split(stats, self.out_channels, dim=1)
return m, logs, x_mask return m, logs, x_mask
class ResidualCouplingBlock(nn.Module): class ResidualCouplingBlock(nn.Module):
def __init__( def __init__(
self, self,
@ -635,6 +638,8 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
z = self.flow(z_p, x_mask, g=g, reverse=True) z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g) o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
return o, x_mask, (z, z_p, m_p, logs_p) return o, x_mask, (z, z_p, m_p, logs_p)
class SynthesizerTrnMs768NSFsid(nn.Module): class SynthesizerTrnMs768NSFsid(nn.Module):
def __init__( def __init__(
self, self,
@ -846,6 +851,8 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
z = self.flow(z_p, x_mask, g=g, reverse=True) z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], g=g) o = self.dec((z * x_mask)[:, :, :max_len], g=g)
return o, x_mask, (z, z_p, m_p, logs_p) return o, x_mask, (z, z_p, m_p, logs_p)
class SynthesizerTrnMs768NSFsid_nono(nn.Module): class SynthesizerTrnMs768NSFsid_nono(nn.Module):
def __init__( def __init__(
self, self,
@ -977,11 +984,12 @@ class MultiPeriodDiscriminator(torch.nn.Module):
return y_d_rs, y_d_gs, fmap_rs, fmap_gs return y_d_rs, y_d_gs, fmap_rs, fmap_gs
class MultiPeriodDiscriminatorV2(torch.nn.Module): class MultiPeriodDiscriminatorV2(torch.nn.Module):
def __init__(self, use_spectral_norm=False): def __init__(self, use_spectral_norm=False):
super(MultiPeriodDiscriminatorV2, self).__init__() super(MultiPeriodDiscriminatorV2, self).__init__()
# periods = [2, 3, 5, 7, 11, 17] # periods = [2, 3, 5, 7, 11, 17]
periods = [2,3, 5, 7, 11, 17, 23, 37] periods = [2, 3, 5, 7, 11, 17, 23, 37]
discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
discs = discs + [ discs = discs + [

View File

@ -2,7 +2,7 @@ import torch, traceback, os, pdb
from collections import OrderedDict from collections import OrderedDict
def savee(ckpt, sr, if_f0, name, epoch,version): def savee(ckpt, sr, if_f0, name, epoch, version):
try: try:
opt = OrderedDict() opt = OrderedDict()
opt["weight"] = {} opt["weight"] = {}
@ -96,7 +96,7 @@ def show_info(path):
return traceback.format_exc() return traceback.format_exc()
def extract_small_model(path, name, sr, if_f0, info,version): def extract_small_model(path, name, sr, if_f0, info, version):
try: try:
ckpt = torch.load(path, map_location="cpu") ckpt = torch.load(path, map_location="cpu")
if "model" in ckpt: if "model" in ckpt:
@ -194,7 +194,7 @@ def change_info(path, info, name):
return traceback.format_exc() return traceback.format_exc()
def merge(path1, path2, alpha1, sr, f0, info, name,version): def merge(path1, path2, alpha1, sr, f0, info, name, version):
try: try:
def extract(ckpt): def extract(ckpt):

View File

@ -322,7 +322,11 @@ def get_hparams(init=True):
"-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k" "-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k"
) )
parser.add_argument( parser.add_argument(
"-sw", "--save_every_weights", type=str, default="0", help="save the extracted model in weights directory when saving checkpoints" "-sw",
"--save_every_weights",
type=str,
default="0",
help="save the extracted model in weights directory when saving checkpoints",
) )
parser.add_argument( parser.add_argument(
"-v", "--version", type=str, required=True, help="model version" "-v", "--version", type=str, required=True, help="model version"

View File

@ -31,7 +31,8 @@ from data_utils import (
TextAudioCollate, TextAudioCollate,
DistributedBucketSampler, DistributedBucketSampler,
) )
if(hps.version=="v1"):
if hps.version == "v1":
from infer_pack.models import ( from infer_pack.models import (
SynthesizerTrnMs256NSFsid as RVC_Model_f0, SynthesizerTrnMs256NSFsid as RVC_Model_f0,
SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0, SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
@ -519,14 +520,25 @@ def train_and_evaluate(
epoch, epoch,
os.path.join(hps.model_dir, "D_{}.pth".format(2333333)), os.path.join(hps.model_dir, "D_{}.pth".format(2333333)),
) )
if(rank==0 and hps.save_every_weights=="1"): if rank == 0 and hps.save_every_weights == "1":
if hasattr(net_g, "module"): if hasattr(net_g, "module"):
ckpt = net_g.module.state_dict() ckpt = net_g.module.state_dict()
else: else:
ckpt = net_g.state_dict() ckpt = net_g.state_dict()
logger.info( logger.info(
"saving ckpt %s_e%s:%s" "saving ckpt %s_e%s:%s"
% (hps.name,epoch,savee(ckpt, hps.sample_rate, hps.if_f0, hps.name+"_e%s"%epoch, epoch,hps.version)) % (
hps.name,
epoch,
savee(
ckpt,
hps.sample_rate,
hps.if_f0,
hps.name + "_e%s" % epoch,
epoch,
hps.version,
),
)
) )
if rank == 0: if rank == 0:
@ -540,7 +552,7 @@ def train_and_evaluate(
ckpt = net_g.state_dict() ckpt = net_g.state_dict()
logger.info( logger.info(
"saving final ckpt:%s" "saving final ckpt:%s"
% (savee(ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch,hps.version)) % (savee(ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version))
) )
sleep(1) sleep(1)
os._exit(2333333) os._exit(2333333)

View File

@ -2,16 +2,18 @@ import numpy as np, parselmouth, torch, pdb
from time import time as ttime from time import time as ttime
import torch.nn.functional as F import torch.nn.functional as F
import scipy.signal as signal import scipy.signal as signal
import pyworld, os, traceback, faiss,librosa import pyworld, os, traceback, faiss, librosa
from scipy import signal from scipy import signal
from functools import lru_cache from functools import lru_cache
bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
input_audio_path2wav={} input_audio_path2wav = {}
@lru_cache @lru_cache
def cache_harvest_f0(input_audio_path,fs,f0max,f0min,frame_period): def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
audio=input_audio_path2wav[input_audio_path] audio = input_audio_path2wav[input_audio_path]
f0, t = pyworld.harvest( f0, t = pyworld.harvest(
audio, audio,
fs=fs, fs=fs,
@ -22,18 +24,29 @@ def cache_harvest_f0(input_audio_path,fs,f0max,f0min,frame_period):
f0 = pyworld.stonemask(audio, f0, t, fs) f0 = pyworld.stonemask(audio, f0, t, fs)
return f0 return f0
def change_rms(data1,sr1,data2,sr2,rate):#1是输入音频2是输出音频,rate是2的占比
def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频2是输出音频,rate是2的占比
# print(data1.max(),data2.max()) # print(data1.max(),data2.max())
rms1 = librosa.feature.rms(y=data1, frame_length=sr1//2*2, hop_length=sr1//2)#每半秒一个点 rms1 = librosa.feature.rms(
rms2 = librosa.feature.rms(y=data2, frame_length=sr2//2*2, hop_length=sr2//2) y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2
rms1=torch.from_numpy(rms1) ) # 每半秒一个点
rms1=F.interpolate(rms1.unsqueeze(0), size=data2.shape[0],mode='linear').squeeze() rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
rms2=torch.from_numpy(rms2) rms1 = torch.from_numpy(rms1)
rms2=F.interpolate(rms2.unsqueeze(0), size=data2.shape[0],mode='linear').squeeze() rms1 = F.interpolate(
rms2=torch.max(rms2,torch.zeros_like(rms2)+1e-6) rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
data2*=(torch.pow(rms1,torch.tensor(1-rate))*torch.pow(rms2,torch.tensor(rate-1))).numpy() ).squeeze()
rms2 = torch.from_numpy(rms2)
rms2 = F.interpolate(
rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
).squeeze()
rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
data2 *= (
torch.pow(rms1, torch.tensor(1 - rate))
* torch.pow(rms2, torch.tensor(rate - 1))
).numpy()
return data2 return data2
class VC(object): class VC(object):
def __init__(self, tgt_sr, config): def __init__(self, tgt_sr, config):
self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = (
@ -53,7 +66,16 @@ class VC(object):
self.t_max = self.sr * self.x_max # 免查询时长阈值 self.t_max = self.sr * self.x_max # 免查询时长阈值
self.device = config.device self.device = config.device
def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None): def get_f0(
self,
input_audio_path,
x,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0=None,
):
global input_audio_path2wav global input_audio_path2wav
time_step = self.window / self.sr * 1000 time_step = self.window / self.sr * 1000
f0_min = 50 f0_min = 50
@ -77,9 +99,9 @@ class VC(object):
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
) )
elif f0_method == "harvest": elif f0_method == "harvest":
input_audio_path2wav[input_audio_path]=x.astype(np.double) input_audio_path2wav[input_audio_path] = x.astype(np.double)
f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10) f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if(filter_radius>2): if filter_radius > 2:
f0 = signal.medfilt(f0, 3) f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12) f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
@ -134,12 +156,12 @@ class VC(object):
inputs = { inputs = {
"source": feats.to(self.device), "source": feats.to(self.device),
"padding_mask": padding_mask, "padding_mask": padding_mask,
"output_layer": 9if version=="v1"else 12, "output_layer": 9 if version == "v1" else 12,
} }
t0 = ttime() t0 = ttime()
with torch.no_grad(): with torch.no_grad():
logits = model.extract_features(**inputs) logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0])if version=="v1"else logits[0] feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
if ( if (
isinstance(index, type(None)) == False isinstance(index, type(None)) == False
@ -184,10 +206,7 @@ class VC(object):
) )
else: else:
audio1 = ( audio1 = (
(net_g.infer(feats, p_len, sid)[0][0, 0]) (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy()
.data.cpu()
.float()
.numpy()
) )
del feats, p_len, padding_mask del feats, p_len, padding_mask
if torch.cuda.is_available(): if torch.cuda.is_available():
@ -270,7 +289,15 @@ class VC(object):
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
pitch, pitchf = None, None pitch, pitchf = None, None
if if_f0 == 1: if if_f0 == 1:
pitch, pitchf = self.get_f0(input_audio_path,audio_pad, p_len, f0_up_key, f0_method,filter_radius, inp_f0) pitch, pitchf = self.get_f0(
input_audio_path,
audio_pad,
p_len,
f0_up_key,
f0_method,
filter_radius,
inp_f0,
)
pitch = pitch[:p_len] pitch = pitch[:p_len]
pitchf = pitchf[:p_len] pitchf = pitchf[:p_len]
if self.device == "mps": if self.device == "mps":
@ -347,16 +374,17 @@ class VC(object):
)[self.t_pad_tgt : -self.t_pad_tgt] )[self.t_pad_tgt : -self.t_pad_tgt]
) )
audio_opt = np.concatenate(audio_opt) audio_opt = np.concatenate(audio_opt)
if(rms_mix_rate!=1): if rms_mix_rate != 1:
audio_opt=change_rms(audio,16000,audio_opt,tgt_sr,rms_mix_rate) audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate)
if(resample_sr>=16000 and tgt_sr!=resample_sr): if resample_sr >= 16000 and tgt_sr != resample_sr:
audio_opt = librosa.resample( audio_opt = librosa.resample(
audio_opt, orig_sr=tgt_sr, target_sr=resample_sr audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
) )
audio_max=np.abs(audio_opt).max()/0.99 audio_max = np.abs(audio_opt).max() / 0.99
max_int16=32768 max_int16 = 32768
if(audio_max>1):max_int16/=audio_max if audio_max > 1:
audio_opt=(audio_opt * max_int16).astype(np.int16) max_int16 /= audio_max
audio_opt = (audio_opt * max_int16).astype(np.int16)
del pitch, pitchf, sid del pitch, pitchf, sid
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() torch.cuda.empty_cache()