diff --git a/infer/lib/audio.py b/infer/lib/audio.py
index 56acbdc..13c12f5 100644
--- a/infer/lib/audio.py
+++ b/infer/lib/audio.py
@@ -1,3 +1,6 @@
+import os
+import traceback
+
 import librosa
 import numpy as np
 import av
@@ -47,10 +50,14 @@ def audio2(i, o, format, sr):
 
 
 def load_audio(file, sr):
+    file = (
+        file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+    )  # 防止小白拷路径头尾带了空格和"和回车
+    if os.path.exists(file) == False:
+        raise RuntimeError(
+            "You input a wrong audio path that does not exists, please fix it!"
+        )
     try:
-        file = (
-            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        )  # 防止小白拷路径头尾带了空格和"和回车
         with open(file, "rb") as f:
             with BytesIO() as out:
                 audio2(f, out, "f32le", sr)
@@ -62,5 +69,5 @@ def load_audio(file, sr):
             audio = np.mean(audio, -1)
         return librosa.resample(audio, orig_sr=file[0], target_sr=16000)
 
-    except Exception as e:
-        raise RuntimeError(f"Failed to load audio: {e}")
+    except:
+        raise RuntimeError(traceback.format_exc())
diff --git a/infer/modules/uvr5/mdxnet.py b/infer/modules/uvr5/mdxnet.py
index 86a0668..2f246db 100644
--- a/infer/modules/uvr5/mdxnet.py
+++ b/infer/modules/uvr5/mdxnet.py
@@ -216,16 +216,26 @@ class Predictor:
             path_other = "%s/%s_others.wav" % (others_root, basename)
             sf.write(path_vocal, mix - opt, rate)
             sf.write(path_other, opt, rate)
+            opt_path_vocal = path_vocal[:-4] + ".%s" % format
+            opt_path_other = path_other[:-4] + ".%s" % format
             if os.path.exists(path_vocal):
                 os.system(
-                    "ffmpeg -i %s -vn %s -q:a 2 -y"
-                    % (path_vocal, path_vocal[:-4] + ".%s" % format)
+                    "ffmpeg -i %s -vn %s -q:a 2 -y" % (path_vocal, opt_path_vocal)
                 )
+                if os.path.exists(opt_path_vocal):
+                    try:
+                        os.remove(path_vocal)
+                    except:
+                        pass
             if os.path.exists(path_other):
                 os.system(
-                    "ffmpeg -i %s -vn %s -q:a 2 -y"
-                    % (path_other, path_other[:-4] + ".%s" % format)
+                    "ffmpeg -i %s -vn %s -q:a 2 -y" % (path_other, opt_path_other)
                 )
+                if os.path.exists(opt_path_other):
+                    try:
+                        os.remove(path_other)
+                    except:
+                        pass
 
 
 class MDXNetDereverb:
@@ -242,5 +252,5 @@ class MDXNetDereverb:
         self.pred = Predictor(self)
         self.device = device
 
-    def path_audio(self, input, vocal_root, others_root, format):
+    def _path_audio_(self, input, vocal_root, others_root, format, is_hp3=False):
         self.pred.prediction(input, vocal_root, others_root, format)
diff --git a/infer/modules/uvr5/modules.py b/infer/modules/uvr5/modules.py
index f63ac6a..bce3cef 100644
--- a/infer/modules/uvr5/modules.py
+++ b/infer/modules/uvr5/modules.py
@@ -9,7 +9,7 @@ import torch
 
 from configs.config import Config
 from infer.modules.uvr5.mdxnet import MDXNetDereverb
-from infer.modules.uvr5.preprocess import AudioPre, AudioPreDeEcho
+from infer.modules.uvr5.vr import AudioPre, AudioPreDeEcho
 
 config = Config()
 
@@ -36,6 +36,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
                 device=config.device,
                 is_half=config.is_half,
             )
+        is_hp3 = "HP3" in model_name
         if inp_root != "":
             paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
         else:
@@ -52,7 +53,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
                 ):
                     need_reformat = 0
                     pre_fun._path_audio_(
-                        inp_path, save_root_ins, save_root_vocal, format0
+                        inp_path, save_root_ins, save_root_vocal, format0, is_hp3=is_hp3
                     )
                     done = 1
             except:
@@ -70,7 +71,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
                 inp_path = tmp_path
             try:
                 if done == 0:
-                    pre_fun.path_audio(
+                    pre_fun._path_audio_(
                         inp_path, save_root_ins, save_root_vocal, format0
                     )
                 infos.append("%s->Success" % (os.path.basename(inp_path)))
diff --git a/infer/modules/uvr5/preprocess.py b/infer/modules/uvr5/vr.py
similarity index 85%
rename from infer/modules/uvr5/preprocess.py
rename to infer/modules/uvr5/vr.py
index c22b291..d3fbac4 100644
--- a/infer/modules/uvr5/preprocess.py
+++ b/infer/modules/uvr5/vr.py
@@ -41,7 +41,9 @@ class AudioPre:
         self.mp = mp
         self.model = model
 
-    def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"):
+    def _path_audio_(
+        self, music_file, ins_root=None, vocal_root=None, format="flac", is_hp3=False
+    ):
         if ins_root is None and vocal_root is None:
             return "No save root."
         name = os.path.basename(music_file)
@@ -120,18 +122,22 @@ class AudioPre:
             else:
                 wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
             logger.info("%s instruments done" % name)
+            if is_hp3 == True:
+                head = "vocal_"
+            else:
+                head = "instrument_"
             if format in ["wav", "flac"]:
                 sf.write(
                     os.path.join(
                         ins_root,
-                        "instrument_{}_{}.{}".format(name, self.data["agg"], format),
+                        head + "{}_{}.{}".format(name, self.data["agg"], format),
                     ),
                     (np.array(wav_instrument) * 32768).astype("int16"),
                     self.mp.param["sr"],
                 )  #
             else:
                 path = os.path.join(
-                    ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
+                    ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
                 )
                 sf.write(
                     path,
@@ -139,11 +145,18 @@ class AudioPre:
                     self.mp.param["sr"],
                 )
                 if os.path.exists(path):
-                    os.system(
-                        "ffmpeg -i %s -vn %s -q:a 2 -y"
-                        % (path, path[:-4] + ".%s" % format)
-                    )
+                    opt_format_path = path[:-4] + ".%s" % format
+                    os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
+                    if os.path.exists(opt_format_path):
+                        try:
+                            os.remove(path)
+                        except:
+                            pass
         if vocal_root is not None:
+            if is_hp3 == True:
+                head = "instrument_"
+            else:
+                head = "vocal_"
             if self.data["high_end_process"].startswith("mirroring"):
                 input_high_end_ = spec_utils.mirroring(
                     self.data["high_end_process"], v_spec_m, input_high_end, self.mp
@@ -158,14 +171,14 @@ class AudioPre:
                 sf.write(
                     os.path.join(
                         vocal_root,
-                        "vocal_{}_{}.{}".format(name, self.data["agg"], format),
+                        head + "{}_{}.{}".format(name, self.data["agg"], format),
                     ),
                     (np.array(wav_vocals) * 32768).astype("int16"),
                     self.mp.param["sr"],
                 )
             else:
                 path = os.path.join(
-                    vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
+                    vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
                 )
                 sf.write(
                     path,
@@ -173,10 +186,13 @@ class AudioPre:
                     self.mp.param["sr"],
                 )
                 if os.path.exists(path):
-                    os.system(
-                        "ffmpeg -i %s -vn %s -q:a 2 -y"
-                        % (path, path[:-4] + ".%s" % format)
-                    )
+                    opt_format_path = path[:-4] + ".%s" % format
+                    os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
+                    if os.path.exists(opt_format_path):
+                        try:
+                            os.remove(path)
+                        except:
+                            pass
 
 
 class AudioPreDeEcho:
@@ -207,7 +223,7 @@ class AudioPreDeEcho:
         self.model = model
 
     def _path_audio_(
-        self, music_file, vocal_root=None, ins_root=None, format="flac"
+        self, music_file, vocal_root=None, ins_root=None, format="flac", is_hp3=False
     ):  # 3个VR模型vocal和ins是反的
         if ins_root is None and vocal_root is None:
             return "No save root."
@@ -306,10 +322,13 @@ class AudioPreDeEcho:
                     self.mp.param["sr"],
                 )
                 if os.path.exists(path):
-                    os.system(
-                        "ffmpeg -i %s -vn %s -q:a 2 -y"
-                        % (path, path[:-4] + ".%s" % format)
-                    )
+                    opt_format_path = path[:-4] + ".%s" % format
+                    os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
+                    if os.path.exists(opt_format_path):
+                        try:
+                            os.remove(path)
+                        except:
+                            pass
         if vocal_root is not None:
             if self.data["high_end_process"].startswith("mirroring"):
                 input_high_end_ = spec_utils.mirroring(
@@ -340,7 +359,10 @@ class AudioPreDeEcho:
                     self.mp.param["sr"],
                 )
                 if os.path.exists(path):
-                    os.system(
-                        "ffmpeg -i %s -vn %s -q:a 2 -y"
-                        % (path, path[:-4] + ".%s" % format)
-                    )
+                    opt_format_path = path[:-4] + ".%s" % format
+                    os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
+                    if os.path.exists(opt_format_path):
+                        try:
+                            os.remove(path)
+                        except:
+                            pass
diff --git a/pyproject.toml b/pyproject.toml
index fd67580..6a91a9c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ uvicorn = "^0.21.1"
 colorama = "^0.4.6"
 torchcrepe = "0.0.20"
 python-dotenv = "^1.0.0"
+av = "^10.0.0"
 
 [tool.poetry.dev-dependencies]