feat: normalize audio

2025-05-06 20:01:37 +08:00 · 2024-06-04 22:29:39 +09:00 · 2024-06-04 22:29:39 +09:00 · b3215d126c
commit b3215d126c
parent c5d06bd2ec
4 changed files with 7 additions and 10 deletions
--- a/infer/modules/vc/hash.py
+++ b/infer/modules/vc/hash.py
@ -47,6 +47,11 @@ def original_audio_time_minus():
 def original_audio_freq_minus():
    return original_audio_storage()["f"]

+@singleton_variable
+def original_rmvpe_f0():
+    x = original_audio_storage()
+    return x["pitch"], x["pitchf"]
+

 def _cut_u16(n):
    if n > 16384:
@ -88,9 +93,6 @@ def wave_hash(time_field):
 def model_hash(config, tgt_sr, net_g, if_f0, version):
    pipeline = Pipeline(tgt_sr, config)
    audio = original_audio()
-    audio_max = np.abs(audio).max() / 0.95
-    if audio_max > 1:
-        np.divide(audio, audio_max, audio)
    hbt = load_hubert(config.device, config.is_half)
    audio_opt = pipeline.pipeline(
        hbt,
@ -99,7 +101,7 @@ def model_hash(config, tgt_sr, net_g, if_f0, version):
        audio,
        [0, 0, 0],
        6,
-        "rmvpe",
+        original_rmvpe_f0(),
        "",
        0,
        2 if if_f0 else 0,
--- a/infer/modules/vc/lgdsng.npz
+++ b/infer/modules/vc/lgdsng.npz
--- a/infer/modules/vc/lgdsng_f0.npz
+++ b/infer/modules/vc/lgdsng_f0.npz
--- a/infer/modules/vc/pipeline.py
+++ b/infer/modules/vc/pipeline.py
@ -16,7 +16,6 @@ import pyworld
 import torch
 import torch.nn.functional as F
 import torchcrepe
-import pathlib
 from scipy import signal

 now_dir = os.getcwd()
@ -385,12 +384,8 @@ class Pipeline(object):
                    filter_radius,
                    inp_f0,
                )
-                """
-                np.savez_compressed(pathlib.Path(__file__).parent / "lgdsng_f0.npz", pitch=pitch, pitchf=pitchf)
-                """
            elif if_f0 == 2:
-                pitchz = np.load(pathlib.Path(__file__).parent / "lgdsng_f0.npz")
-                pitch, pitchf = pitchz["pitch"], pitchz["pitchf"]
+                pitch, pitchf = f0_method
            pitch = pitch[:p_len]
            pitchf = pitchf[:p_len]
            if "mps" not in str(self.device) or "xpu" not in str(self.device):