Update audio.py

2025-05-15 16:29:05 +08:00 · 2024-01-23 15:15:11 +08:00 · 2024-01-23 15:15:11 +08:00 · 5dbdbab2c1
commit 5dbdbab2c1
parent 267192fd92
1 changed files with 19 additions and 41 deletions
--- a/infer/lib/audio.py
+++ b/infer/lib/audio.py
@ -1,7 +1,5 @@
-import os
-import traceback
-
-import librosa
+import platform,os
+import ffmpeg
 import numpy as np
 import av
 from io import BytesIO
@ -30,44 +28,24 @@ def wav2(i, o, format):
    inp.close()


-def audio2(i, o, format, sr):
-    inp = av.open(i, "rb")
-    out = av.open(o, "wb", format=format)
-    if format == "ogg":
-        format = "libvorbis"
-    if format == "f32le":
-        format = "pcm_f32le"
-
-    ostream = out.add_stream(format, channels=1)
-    ostream.sample_rate = sr
-
-    for frame in inp.decode(audio=0):
-        for p in ostream.encode(frame):
-            out.mux(p)
-
-    out.close()
-    inp.close()
-
-
 def load_audio(file, sr):
-    file = (
-        file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-    )  # 防止小白拷路径头尾带了空格和"和回车
-    if os.path.exists(file) == False:
-        raise RuntimeError(
-            "You input a wrong audio path that does not exists, please fix it!"
-        )
    try:
-        with open(file, "rb") as f:
-            with BytesIO() as out:
-                audio2(f, out, "f32le", sr)
-                return np.frombuffer(out.getvalue(), np.float32).flatten()
+        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
+        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
+        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+        file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
+        out, _ = (
+            ffmpeg.input(file, threads=0)
+            .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
+            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+        )
+    except Exception as e:
+        raise RuntimeError(f"Failed to load audio: {e}")

-    except AttributeError:
-        audio = file[1] / 32768.0
-        if len(audio.shape) == 2:
-            audio = np.mean(audio, -1)
-        return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
+    return np.frombuffer(out, np.float32).flatten()

-    except:
-        raise RuntimeError(traceback.format_exc())
+
+def clean_path(path_str):
+    if platform.system() == 'Windows':
+        path_str = path_str.replace('/', '\\')
+    return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")