Merge pull request #1618 from CNChTu/main

add fcpe for realtime
This commit is contained in:
RVC-Boss 2023-12-15 00:20:04 +08:00 committed by GitHub
commit d269d14768
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1324 additions and 1295 deletions

1759
gui_v1.py

File diff suppressed because it is too large Load Diff

View File

@ -42,6 +42,7 @@ onnxruntime; sys_platform == 'darwin'
onnxruntime-gpu; sys_platform != 'darwin' onnxruntime-gpu; sys_platform != 'darwin'
torchcrepe==0.0.20 torchcrepe==0.0.20
fastapi==0.88 fastapi==0.88
torchfcpe
ffmpy==0.3.1 ffmpy==0.3.1
python-dotenv>=1.0.0 python-dotenv>=1.0.0
av av

View File

@ -1,421 +1,438 @@
from io import BytesIO from io import BytesIO
import os import os
import pickle import pickle
import sys import sys
import traceback import traceback
from infer.lib import jit from infer.lib import jit
from infer.lib.jit.get_synthesizer import get_synthesizer from infer.lib.jit.get_synthesizer import get_synthesizer
from time import time as ttime from time import time as ttime
import fairseq import fairseq
import faiss import faiss
import numpy as np import numpy as np
import parselmouth import parselmouth
import pyworld import pyworld
import scipy.signal as signal import scipy.signal as signal
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torchcrepe import torchcrepe
from infer.lib.infer_pack.models import ( from infer.lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono, SynthesizerTrnMs768NSFsid_nono,
) )
now_dir = os.getcwd() now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
from multiprocessing import Manager as M from multiprocessing import Manager as M
from configs.config import Config from configs.config import Config
# config = Config() # config = Config()
mm = M() mm = M()
def printt(strr, *args): def printt(strr, *args):
if len(args) == 0: if len(args) == 0:
print(strr) print(strr)
else: else:
print(strr % args) print(strr % args)
# config.device=torch.device("cpu")########强制cpu测试 # config.device=torch.device("cpu")########强制cpu测试
# config.is_half=False########强制cpu测试 # config.is_half=False########强制cpu测试
class RVC: class RVC:
def __init__( def __init__(
self, self,
key, key,
pth_path, pth_path,
index_path, index_path,
index_rate, index_rate,
n_cpu, n_cpu,
inp_q, inp_q,
opt_q, opt_q,
config: Config, config: Config,
last_rvc=None, last_rvc=None,
) -> None: ) -> None:
""" """
初始化 初始化
""" """
try: try:
if config.dml == True: if config.dml == True:
def forward_dml(ctx, x, scale):
def forward_dml(ctx, x, scale): ctx.scale = scale
ctx.scale = scale res = x.clone().detach()
res = x.clone().detach() return res
return res
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml # global config
# global config self.config = config
self.config = config self.inp_q = inp_q
self.inp_q = inp_q self.opt_q = opt_q
self.opt_q = opt_q # device="cpu"########强制cpu测试
# device="cpu"########强制cpu测试 self.device = config.device
self.device = config.device self.f0_up_key = key
self.f0_up_key = key self.time_step = 160 / 16000 * 1000
self.time_step = 160 / 16000 * 1000 self.f0_min = 50
self.f0_min = 50 self.f0_max = 1100
self.f0_max = 1100 self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) self.sr = 16000
self.sr = 16000 self.window = 160
self.window = 160 self.n_cpu = n_cpu
self.n_cpu = n_cpu self.use_jit = self.config.use_jit
self.use_jit = self.config.use_jit self.is_half = config.is_half
self.is_half = config.is_half
if index_rate != 0:
if index_rate != 0: self.index = faiss.read_index(index_path)
self.index = faiss.read_index(index_path) self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) printt("Index search enabled")
printt("Index search enabled") self.pth_path: str = pth_path
self.pth_path: str = pth_path self.index_path = index_path
self.index_path = index_path self.index_rate = index_rate
self.index_rate = index_rate
if last_rvc is None:
if last_rvc is None: models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( ["assets/hubert/hubert_base.pt"],
["assets/hubert/hubert_base.pt"], suffix="",
suffix="", )
) hubert_model = models[0]
hubert_model = models[0] hubert_model = hubert_model.to(self.device)
hubert_model = hubert_model.to(self.device) if self.is_half:
if self.is_half: hubert_model = hubert_model.half()
hubert_model = hubert_model.half() else:
else: hubert_model = hubert_model.float()
hubert_model = hubert_model.float() hubert_model.eval()
hubert_model.eval() self.model = hubert_model
self.model = hubert_model else:
else: self.model = last_rvc.model
self.model = last_rvc.model
self.net_g: nn.Module = None
self.net_g: nn.Module = None
def set_default_model():
def set_default_model(): self.net_g, cpt = get_synthesizer(self.pth_path, self.device)
self.net_g, cpt = get_synthesizer(self.pth_path, self.device) self.tgt_sr = cpt["config"][-1]
self.tgt_sr = cpt["config"][-1] cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] self.if_f0 = cpt.get("f0", 1)
self.if_f0 = cpt.get("f0", 1) self.version = cpt.get("version", "v1")
self.version = cpt.get("version", "v1") if self.is_half:
if self.is_half: self.net_g = self.net_g.half()
self.net_g = self.net_g.half() else:
else: self.net_g = self.net_g.float()
self.net_g = self.net_g.float()
def set_jit_model():
def set_jit_model(): jit_pth_path = self.pth_path.rstrip(".pth")
jit_pth_path = self.pth_path.rstrip(".pth") jit_pth_path += ".half.jit" if self.is_half else ".jit"
jit_pth_path += ".half.jit" if self.is_half else ".jit" reload = False
reload = False if str(self.device) == "cuda":
if str(self.device) == "cuda": self.device = torch.device("cuda:0")
self.device = torch.device("cuda:0") if os.path.exists(jit_pth_path):
if os.path.exists(jit_pth_path): cpt = jit.load(jit_pth_path)
cpt = jit.load(jit_pth_path) model_device = cpt["device"]
model_device = cpt["device"] if model_device != str(self.device):
if model_device != str(self.device): reload = True
reload = True else:
else: reload = True
reload = True
if reload:
if reload: cpt = jit.synthesizer_jit_export(
cpt = jit.synthesizer_jit_export( self.pth_path,
self.pth_path, "script",
"script", None,
None, device=self.device,
device=self.device, is_half=self.is_half,
is_half=self.is_half, )
)
self.tgt_sr = cpt["config"][-1]
self.tgt_sr = cpt["config"][-1] self.if_f0 = cpt.get("f0", 1)
self.if_f0 = cpt.get("f0", 1) self.version = cpt.get("version", "v1")
self.version = cpt.get("version", "v1") self.net_g = torch.jit.load(
self.net_g = torch.jit.load( BytesIO(cpt["model"]), map_location=self.device
BytesIO(cpt["model"]), map_location=self.device )
) self.net_g.infer = self.net_g.forward
self.net_g.infer = self.net_g.forward self.net_g.eval().to(self.device)
self.net_g.eval().to(self.device)
def set_synthesizer():
def set_synthesizer(): if self.use_jit and not config.dml:
if self.use_jit and not config.dml: if self.is_half and "cpu" in str(self.device):
if self.is_half and "cpu" in str(self.device): printt(
printt( "Use default Synthesizer model. \
"Use default Synthesizer model. \ Jit is not supported on the CPU for half floating point"
Jit is not supported on the CPU for half floating point" )
) set_default_model()
set_default_model() else:
else: set_jit_model()
set_jit_model() else:
else: set_default_model()
set_default_model()
if last_rvc is None or last_rvc.pth_path != self.pth_path:
if last_rvc is None or last_rvc.pth_path != self.pth_path: set_synthesizer()
set_synthesizer() else:
else: self.tgt_sr = last_rvc.tgt_sr
self.tgt_sr = last_rvc.tgt_sr self.if_f0 = last_rvc.if_f0
self.if_f0 = last_rvc.if_f0 self.version = last_rvc.version
self.version = last_rvc.version self.is_half = last_rvc.is_half
self.is_half = last_rvc.is_half if last_rvc.use_jit != self.use_jit:
if last_rvc.use_jit != self.use_jit: set_synthesizer()
set_synthesizer() else:
else: self.net_g = last_rvc.net_g
self.net_g = last_rvc.net_g
if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"):
if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"): self.model_rmvpe = last_rvc.model_rmvpe
self.model_rmvpe = last_rvc.model_rmvpe if last_rvc is not None and hasattr(last_rvc, "model_fcpe"):
except: self.model_fcpe = last_rvc.model_fcpe
printt(traceback.format_exc()) except:
printt(traceback.format_exc())
def change_key(self, new_key):
self.f0_up_key = new_key def change_key(self, new_key):
self.f0_up_key = new_key
def change_index_rate(self, new_index_rate):
if new_index_rate != 0 and self.index_rate == 0: def change_index_rate(self, new_index_rate):
self.index = faiss.read_index(self.index_path) if new_index_rate != 0 and self.index_rate == 0:
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) self.index = faiss.read_index(self.index_path)
printt("Index search enabled") self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
self.index_rate = new_index_rate printt("Index search enabled")
self.index_rate = new_index_rate
def get_f0_post(self, f0):
f0_min = self.f0_min def get_f0_post(self, f0):
f0_max = self.f0_max f0_min = self.f0_min
f0_mel_min = 1127 * np.log(1 + f0_min / 700) f0_max = self.f0_max
f0_mel_max = 1127 * np.log(1 + f0_max / 700) f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0bak = f0.copy() f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0_mel = 1127 * np.log(1 + f0 / 700) f0bak = f0.copy()
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel_max - f0_mel_min f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
) + 1 f0_mel_max - f0_mel_min
f0_mel[f0_mel <= 1] = 1 ) + 1
f0_mel[f0_mel > 255] = 255 f0_mel[f0_mel <= 1] = 1
f0_coarse = np.rint(f0_mel).astype(np.int32) f0_mel[f0_mel > 255] = 255
return f0_coarse, f0bak f0_coarse = np.rint(f0_mel).astype(np.int32)
return f0_coarse, f0bak
def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
n_cpu = int(n_cpu) def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
if method == "crepe": n_cpu = int(n_cpu)
return self.get_f0_crepe(x, f0_up_key) if method == "crepe":
if method == "rmvpe": return self.get_f0_crepe(x, f0_up_key)
return self.get_f0_rmvpe(x, f0_up_key) if method == "rmvpe":
if method == "pm": return self.get_f0_rmvpe(x, f0_up_key)
p_len = x.shape[0] // 160 + 1 if method == "fcpe":
f0_min = 65 return self.get_f0_fcpe(x, f0_up_key)
l_pad = int(np.ceil(1.5 / f0_min * 16000)) if method == "pm":
r_pad = l_pad + 1 p_len = x.shape[0] // 160 + 1
s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac( f0_min = 65
time_step=0.01, l_pad = int(np.ceil(1.5 / f0_min * 16000))
voicing_threshold=0.6, r_pad = l_pad + 1
pitch_floor=f0_min, s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac(
pitch_ceiling=1100, time_step=0.01,
) voicing_threshold=0.6,
assert np.abs(s.t1 - 1.5 / f0_min) < 0.001 pitch_floor=f0_min,
f0 = s.selected_array["frequency"] pitch_ceiling=1100,
if len(f0) < p_len: )
f0 = np.pad(f0, (0, p_len - len(f0))) assert np.abs(s.t1 - 1.5 / f0_min) < 0.001
f0 = f0[:p_len] f0 = s.selected_array["frequency"]
f0 *= pow(2, f0_up_key / 12) if len(f0) < p_len:
return self.get_f0_post(f0) f0 = np.pad(f0, (0, p_len - len(f0)))
if n_cpu == 1: f0 = f0[:p_len]
f0, t = pyworld.harvest( f0 *= pow(2, f0_up_key / 12)
x.astype(np.double), return self.get_f0_post(f0)
fs=16000, if n_cpu == 1:
f0_ceil=1100, f0, t = pyworld.harvest(
f0_floor=50, x.astype(np.double),
frame_period=10, fs=16000,
) f0_ceil=1100,
f0 = signal.medfilt(f0, 3) f0_floor=50,
f0 *= pow(2, f0_up_key / 12) frame_period=10,
return self.get_f0_post(f0) )
f0bak = np.zeros(x.shape[0] // 160 + 1, dtype=np.float64) f0 = signal.medfilt(f0, 3)
length = len(x) f0 *= pow(2, f0_up_key / 12)
part_length = 160 * ((length // 160 - 1) // n_cpu + 1) return self.get_f0_post(f0)
n_cpu = (length // 160 - 1) // (part_length // 160) + 1 f0bak = np.zeros(x.shape[0] // 160 + 1, dtype=np.float64)
ts = ttime() length = len(x)
res_f0 = mm.dict() part_length = 160 * ((length // 160 - 1) // n_cpu + 1)
for idx in range(n_cpu): n_cpu = (length // 160 - 1) // (part_length // 160) + 1
tail = part_length * (idx + 1) + 320 ts = ttime()
if idx == 0: res_f0 = mm.dict()
self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts)) for idx in range(n_cpu):
else: tail = part_length * (idx + 1) + 320
self.inp_q.put( if idx == 0:
(idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts) self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
) else:
while 1: self.inp_q.put(
res_ts = self.opt_q.get() (idx, x[part_length * idx - 320: tail], res_f0, n_cpu, ts)
if res_ts == ts: )
break while 1:
f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])] res_ts = self.opt_q.get()
for idx, f0 in enumerate(f0s): if res_ts == ts:
if idx == 0: break
f0 = f0[:-3] f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
elif idx != n_cpu - 1: for idx, f0 in enumerate(f0s):
f0 = f0[2:-3] if idx == 0:
else: f0 = f0[:-3]
f0 = f0[2:] elif idx != n_cpu - 1:
f0bak[ f0 = f0[2:-3]
part_length * idx // 160 : part_length * idx // 160 + f0.shape[0] else:
] = f0 f0 = f0[2:]
f0bak = signal.medfilt(f0bak, 3) f0bak[
f0bak *= pow(2, f0_up_key / 12) part_length * idx // 160: part_length * idx // 160 + f0.shape[0]
return self.get_f0_post(f0bak) ] = f0
f0bak = signal.medfilt(f0bak, 3)
def get_f0_crepe(self, x, f0_up_key): f0bak *= pow(2, f0_up_key / 12)
if "privateuseone" in str(self.device): ###不支持dmlcpu又太慢用不成拿pm顶替 return self.get_f0_post(f0bak)
return self.get_f0(x, f0_up_key, 1, "pm")
audio = torch.tensor(np.copy(x))[None].float() def get_f0_crepe(self, x, f0_up_key):
# printt("using crepe,device:%s"%self.device) if "privateuseone" in str(self.device): ###不支持dmlcpu又太慢用不成拿pm顶替
f0, pd = torchcrepe.predict( return self.get_f0(x, f0_up_key, 1, "pm")
audio, audio = torch.tensor(np.copy(x))[None].float()
self.sr, # printt("using crepe,device:%s"%self.device)
160, f0, pd = torchcrepe.predict(
self.f0_min, audio,
self.f0_max, self.sr,
"full", 160,
batch_size=512, self.f0_min,
# device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用 self.f0_max,
device=self.device, "full",
return_periodicity=True, batch_size=512,
) # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用
pd = torchcrepe.filter.median(pd, 3) device=self.device,
f0 = torchcrepe.filter.mean(f0, 3) return_periodicity=True,
f0[pd < 0.1] = 0 )
f0 = f0[0].cpu().numpy() pd = torchcrepe.filter.median(pd, 3)
f0 *= pow(2, f0_up_key / 12) f0 = torchcrepe.filter.mean(f0, 3)
return self.get_f0_post(f0) f0[pd < 0.1] = 0
f0 = f0[0].cpu().numpy()
def get_f0_rmvpe(self, x, f0_up_key): f0 *= pow(2, f0_up_key / 12)
if hasattr(self, "model_rmvpe") == False: return self.get_f0_post(f0)
from infer.lib.rmvpe import RMVPE
def get_f0_rmvpe(self, x, f0_up_key):
printt("Loading rmvpe model") if hasattr(self, "model_rmvpe") == False:
self.model_rmvpe = RMVPE( from infer.lib.rmvpe import RMVPE
# "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑
# "rmvpe.pt", is_half=False, device=self.device####dml配置 printt("Loading rmvpe model")
# "rmvpe.pt", is_half=False, device="cpu"####锁定cpu配置 self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", # "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑
is_half=self.is_half, # "rmvpe.pt", is_half=False, device=self.device####dml配置
device=self.device, ####正常逻辑 # "rmvpe.pt", is_half=False, device="cpu"####锁定cpu配置
use_jit=self.config.use_jit, "assets/rmvpe/rmvpe.pt",
) is_half=self.is_half,
# self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device) device=self.device, ####正常逻辑
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) use_jit=self.config.use_jit,
f0 *= pow(2, f0_up_key / 12) )
return self.get_f0_post(f0) # self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
def infer( f0 *= pow(2, f0_up_key / 12)
self, return self.get_f0_post(f0)
feats: torch.Tensor,
indata: np.ndarray, def get_f0_fcpe(self, x, f0_up_key):
block_frame_16k, if hasattr(self, "model_fcpe") == False:
rate, from torchfcpe import spawn_bundled_infer_model
cache_pitch, printt("Loading fcpe model")
cache_pitchf, self.model_fcpe = spawn_bundled_infer_model(self.device)
f0method, f0 = self.model_fcpe.infer(
) -> np.ndarray: torch.from_numpy(x).to(self.device).unsqueeze(0).float(),
feats = feats.view(1, -1) sr=16000,
if self.config.is_half: decoder_mode='local_argmax',
feats = feats.half() threshold=0.006,
else: ).squeeze().cpu().numpy()
feats = feats.float() f0 *= pow(2, f0_up_key / 12)
feats = feats.to(self.device) return self.get_f0_post(f0)
t1 = ttime()
with torch.no_grad(): def infer(
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) self,
inputs = { feats: torch.Tensor,
"source": feats, indata: np.ndarray,
"padding_mask": padding_mask, block_frame_16k,
"output_layer": 9 if self.version == "v1" else 12, rate,
} cache_pitch,
logits = self.model.extract_features(**inputs) cache_pitchf,
feats = ( f0method,
self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] ) -> np.ndarray:
) feats = feats.view(1, -1)
feats = torch.cat((feats, feats[:, -1:, :]), 1) if self.config.is_half:
t2 = ttime() feats = feats.half()
try: else:
if hasattr(self, "index") and self.index_rate != 0: feats = feats.float()
leng_replace_head = int(rate * feats[0].shape[0]) feats = feats.to(self.device)
npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32") t1 = ttime()
score, ix = self.index.search(npy, k=8) with torch.no_grad():
weight = np.square(1 / score) padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
weight /= weight.sum(axis=1, keepdims=True) inputs = {
npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) "source": feats,
if self.config.is_half: "padding_mask": padding_mask,
npy = npy.astype("float16") "output_layer": 9 if self.version == "v1" else 12,
feats[0][-leng_replace_head:] = ( }
torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate logits = self.model.extract_features(**inputs)
+ (1 - self.index_rate) * feats[0][-leng_replace_head:] feats = (
) self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
else: )
printt("Index search FAILED or disabled") feats = torch.cat((feats, feats[:, -1:, :]), 1)
except: t2 = ttime()
traceback.print_exc() try:
printt("Index search FAILED") if hasattr(self, "index") and self.index_rate != 0:
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) leng_replace_head = int(rate * feats[0].shape[0])
t3 = ttime() npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32")
if self.if_f0 == 1: score, ix = self.index.search(npy, k=8)
pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method) weight = np.square(1 / score)
start_frame = block_frame_16k // 160 weight /= weight.sum(axis=1, keepdims=True)
end_frame = len(cache_pitch) - (pitch.shape[0] - 4) + start_frame npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
cache_pitch[:] = np.append(cache_pitch[start_frame:end_frame], pitch[3:-1]) if self.config.is_half:
cache_pitchf[:] = np.append( npy = npy.astype("float16")
cache_pitchf[start_frame:end_frame], pitchf[3:-1] feats[0][-leng_replace_head:] = (
) torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
p_len = min(feats.shape[1], 13000, cache_pitch.shape[0]) + (1 - self.index_rate) * feats[0][-leng_replace_head:]
else: )
cache_pitch, cache_pitchf = None, None else:
p_len = min(feats.shape[1], 13000) printt("Index search FAILED or disabled")
t4 = ttime() except:
feats = feats[:, :p_len, :] traceback.print_exc()
if self.if_f0 == 1: printt("Index search FAILED")
cache_pitch = cache_pitch[:p_len] feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
cache_pitchf = cache_pitchf[:p_len] t3 = ttime()
cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device) if self.if_f0 == 1:
cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device) pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method)
p_len = torch.LongTensor([p_len]).to(self.device) start_frame = block_frame_16k // 160
ii = 0 # sid end_frame = len(cache_pitch) - (pitch.shape[0] - 4) + start_frame
sid = torch.LongTensor([ii]).to(self.device) cache_pitch[:] = np.append(cache_pitch[start_frame:end_frame], pitch[3:-1])
with torch.no_grad(): cache_pitchf[:] = np.append(
if self.if_f0 == 1: cache_pitchf[start_frame:end_frame], pitchf[3:-1]
# printt(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2) )
infered_audio = self.net_g.infer( p_len = min(feats.shape[1], 13000, cache_pitch.shape[0])
feats, else:
p_len, cache_pitch, cache_pitchf = None, None
cache_pitch, p_len = min(feats.shape[1], 13000)
cache_pitchf, t4 = ttime()
sid, feats = feats[:, :p_len, :]
torch.FloatTensor([rate]), if self.if_f0 == 1:
)[0][0, 0].data.float() cache_pitch = cache_pitch[:p_len]
else: cache_pitchf = cache_pitchf[:p_len]
infered_audio = self.net_g.infer( cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device)
feats, p_len, sid, torch.FloatTensor([rate]) cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device)
)[0][0, 0].data.float() p_len = torch.LongTensor([p_len]).to(self.device)
t5 = ttime() ii = 0 # sid
printt( sid = torch.LongTensor([ii]).to(self.device)
"Spent time: fea = %.2fs, index = %.2fs, f0 = %.2fs, model = %.2fs", with torch.no_grad():
t2 - t1, if self.if_f0 == 1:
t3 - t2, # printt(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2)
t4 - t3, infered_audio = self.net_g.infer(
t5 - t4, feats,
) p_len,
return infered_audio cache_pitch,
cache_pitchf,
sid,
torch.FloatTensor([rate]),
)[0][0, 0].data.float()
else:
infered_audio = self.net_g.infer(
feats, p_len, sid, torch.FloatTensor([rate])
)[0][0, 0].data.float()
t5 = ttime()
printt(
"Spent time: fea = %.2fs, index = %.2fs, f0 = %.2fs, model = %.2fs",
t2 - t1,
t3 - t2,
t4 - t3,
t5 - t4,
)
return infered_audio