Update audio.py

2025-04-26 14:38:58 +08:00 · 2024-01-23 15:15:11 +08:00 · 2024-01-23 15:15:11 +08:00 · 5dbdbab2c1
commit 5dbdbab2c1
parent 267192fd92
1 changed files with 19 additions and 41 deletions
--- a/infer/lib/audio.py
+++ b/infer/lib/audio.py
@ -1,7 +1,5 @@
-import os
+import platform,os
-import traceback
+import ffmpeg
 import librosa
 import numpy as np
 import av
 from io import BytesIO
@ -30,44 +28,24 @@ def wav2(i, o, format):
    inp.close()
 def audio2(i, o, format, sr):
    inp = av.open(i, "rb")
    out = av.open(o, "wb", format=format)
    if format == "ogg":
        format = "libvorbis"
    if format == "f32le":
        format = "pcm_f32le"
    ostream = out.add_stream(format, channels=1)
    ostream.sample_rate = sr
    for frame in inp.decode(audio=0):
        for p in ostream.encode(frame):
            out.mux(p)
    out.close()
    inp.close()
 def load_audio(file, sr):
    file = (
        file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
    )  # 防止小白拷路径头尾带了空格和"和回车
    if os.path.exists(file) == False:
        raise RuntimeError(
            "You input a wrong audio path that does not exists, please fix it!"
        )
    try:
-        with open(file, "rb") as f:
+        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
-            with BytesIO() as out:
+        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
-                audio2(f, out, "f32le", sr)
+        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
-                return np.frombuffer(out.getvalue(), np.float32).flatten()
+        file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
        out, _ = (
            ffmpeg.input(file, threads=0)
            .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
        )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")
-    except AttributeError:
+    return np.frombuffer(out, np.float32).flatten()
        audio = file[1] / 32768.0
        if len(audio.shape) == 2:
            audio = np.mean(audio, -1)
        return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
-    except:
+
-        raise RuntimeError(traceback.format_exc())
+def clean_path(path_str):
    if platform.system() == 'Windows':
        path_str = path_str.replace('/', '\\')
    return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")