feat: normalize audio

This commit is contained in:
源文雨 2024-06-04 22:29:39 +09:00
parent c5d06bd2ec
commit b3215d126c
4 changed files with 7 additions and 10 deletions

View File

@ -47,6 +47,11 @@ def original_audio_time_minus():
def original_audio_freq_minus():
return original_audio_storage()["f"]
@singleton_variable
def original_rmvpe_f0():
x = original_audio_storage()
return x["pitch"], x["pitchf"]
def _cut_u16(n):
if n > 16384:
@ -88,9 +93,6 @@ def wave_hash(time_field):
def model_hash(config, tgt_sr, net_g, if_f0, version):
pipeline = Pipeline(tgt_sr, config)
audio = original_audio()
audio_max = np.abs(audio).max() / 0.95
if audio_max > 1:
np.divide(audio, audio_max, audio)
hbt = load_hubert(config.device, config.is_half)
audio_opt = pipeline.pipeline(
hbt,
@ -99,7 +101,7 @@ def model_hash(config, tgt_sr, net_g, if_f0, version):
audio,
[0, 0, 0],
6,
"rmvpe",
original_rmvpe_f0(),
"",
0,
2 if if_f0 else 0,

Binary file not shown.

Binary file not shown.

View File

@ -16,7 +16,6 @@ import pyworld
import torch
import torch.nn.functional as F
import torchcrepe
import pathlib
from scipy import signal
now_dir = os.getcwd()
@ -385,12 +384,8 @@ class Pipeline(object):
filter_radius,
inp_f0,
)
"""
np.savez_compressed(pathlib.Path(__file__).parent / "lgdsng_f0.npz", pitch=pitch, pitchf=pitchf)
"""
elif if_f0 == 2:
pitchz = np.load(pathlib.Path(__file__).parent / "lgdsng_f0.npz")
pitch, pitchf = pitchz["pitch"], pitchz["pitchf"]
pitch, pitchf = f0_method
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
if "mps" not in str(self.device) or "xpu" not in str(self.device):