Retrieval-based-Voice-Conve.../uvr5_pack/utils.py

121 lines
3.5 KiB
Python
Raw Normal View History

2023-03-31 17:47:00 +08:00
import torch
import numpy as np
from tqdm import tqdm
import json
def load_data(file_name: str = "./uvr5_pack/name_params.json") -> dict:
with open(file_name, "r") as f:
data = json.load(f)
return data
2023-03-31 17:47:00 +08:00
def make_padding(width, cropsize, offset):
left = offset
roi_size = cropsize - left * 2
if roi_size == 0:
roi_size = cropsize
right = roi_size - (width % roi_size) + left
return left, right, roi_size
def inference(X_spec, device, model, aggressiveness, data):
"""
2023-03-31 17:47:00 +08:00
data dic configs
"""
def _execute(
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half=True
):
2023-03-31 17:47:00 +08:00
model.eval()
with torch.no_grad():
preds = []
2023-03-31 17:47:00 +08:00
iterations = [n_window]
total_iterations = sum(iterations)
for i in tqdm(range(n_window)):
2023-03-31 17:47:00 +08:00
start = i * roi_size
X_mag_window = X_mag_pad[
None, :, :, start : start + data["window_size"]
]
2023-03-31 17:47:00 +08:00
X_mag_window = torch.from_numpy(X_mag_window)
if is_half:
X_mag_window = X_mag_window.half()
X_mag_window = X_mag_window.to(device)
2023-03-31 17:47:00 +08:00
pred = model.predict(X_mag_window, aggressiveness)
pred = pred.detach().cpu().numpy()
preds.append(pred[0])
2023-03-31 17:47:00 +08:00
pred = np.concatenate(preds, axis=2)
return pred
2023-03-31 17:47:00 +08:00
def preprocess(X_spec):
X_mag = np.abs(X_spec)
X_phase = np.angle(X_spec)
return X_mag, X_phase
2023-03-31 17:47:00 +08:00
X_mag, X_phase = preprocess(X_spec)
coef = X_mag.max()
X_mag_pre = X_mag / coef
n_frame = X_mag_pre.shape[2]
pad_l, pad_r, roi_size = make_padding(n_frame, data["window_size"], model.offset)
2023-03-31 17:47:00 +08:00
n_window = int(np.ceil(n_frame / roi_size))
X_mag_pad = np.pad(X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
2023-03-31 17:47:00 +08:00
if list(model.state_dict().values())[0].dtype == torch.float16:
is_half = True
else:
is_half = False
pred = _execute(
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half
)
2023-03-31 17:47:00 +08:00
pred = pred[:, :, :n_frame]
if data["tta"]:
2023-03-31 17:47:00 +08:00
pad_l += roi_size // 2
pad_r += roi_size // 2
n_window += 1
X_mag_pad = np.pad(X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
2023-03-31 17:47:00 +08:00
pred_tta = _execute(
X_mag_pad, roi_size, n_window, device, model, aggressiveness, is_half
)
pred_tta = pred_tta[:, :, roi_size // 2 :]
2023-03-31 17:47:00 +08:00
pred_tta = pred_tta[:, :, :n_frame]
return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.0j * X_phase)
2023-03-31 17:47:00 +08:00
else:
return pred * coef, X_mag, np.exp(1.0j * X_phase)
2023-03-31 17:47:00 +08:00
def _get_name_params(model_path, model_hash):
data = load_data()
flag = False
2023-03-31 17:47:00 +08:00
ModelName = model_path
for type in list(data):
for model in list(data[type][0]):
for i in range(len(data[type][0][model])):
if str(data[type][0][model][i]["hash_name"]) == model_hash:
flag = True
elif str(data[type][0][model][i]["hash_name"]) in ModelName:
flag = True
if flag:
model_params_auto = data[type][0][model][i]["model_params"]
param_name_auto = data[type][0][model][i]["param_name"]
if type == "equivalent":
return param_name_auto, model_params_auto
else:
flag = False
return param_name_auto, model_params_auto