diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb
deleted file mode 100644
index b38d8d2..0000000
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ /dev/null
@@ -1,403 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZFFCx5J80SGa"
-   },
-   "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "GmFP6bN9dvOq"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 查看显卡\n",
-    "!nvidia-smi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "jwu07JgqoFON"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 挂载谷歌云盘\n",
-    "\n",
-    "from google.colab import drive\n",
-    "\n",
-    "drive.mount(\"/content/drive\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "wjddIFr1oS3W"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 安装依赖\n",
-    "!apt-get -y install build-essential python3-dev ffmpeg\n",
-    "!pip3 install --upgrade setuptools wheel\n",
-    "!pip3 install --upgrade pip\n",
-    "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ge_97mfpgqTm"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 克隆仓库\n",
-    "\n",
-    "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
-    "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
-    "!mkdir -p pretrained uvr5_weights"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BLDEZADkvlw1"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 更新仓库（一般无需执行）\n",
-    "!git pull"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "pqE0PrnuRqI2"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 安装aria2\n",
-    "!apt -y install -qq aria2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "UG3XpUwEomUz"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 下载底模\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "HugjmZqZRuiF"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 下载人声分离模型\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "2RCaT9FTR0ej"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 下载hubert_base\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# @title #下载rmvpe模型\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Mwk7Q0Loqzjx"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
-    "\n",
-    "# @markdown 数据集位置\n",
-    "DATASET = (\n",
-    "    \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  # @param {type:\"string\"}\n",
-    ")\n",
-    "\n",
-    "!mkdir -p /content/dataset\n",
-    "!unzip -d /content/dataset -B {DATASET}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "PDlFxWHWEynD"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 重命名数据集中的重名文件\n",
-    "!ls -a /content/dataset/\n",
-    "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "7vh6vphDwO0b"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 启动web\n",
-    "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
-    "# %load_ext tensorboard\n",
-    "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
-    "!python3 infer-web.py --colab --pycmd python3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "FgJuNeAwx5Y_"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动将训练后的模型文件备份到谷歌云盘\n",
-    "# @markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
-    "\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
-    "\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "OVQoLQJXS7WX"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 从谷歌云盘恢复pth\n",
-    "# @markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
-    "\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 模型epoch\n",
-    "MODELEPOCH = 7500  # @param {type:\"integer\"}\n",
-    "\n",
-    "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "!cp /content/drive/MyDrive/*.index /content/\n",
-    "!cp /content/drive/MyDrive/*.npy /content/\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ZKAyuKb9J6dz"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动预处理（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 采样率\n",
-    "BITRATE = 48000  # @param {type:\"integer\"}\n",
-    "# @markdown 使用的进程数\n",
-    "THREADCOUNT = 8  # @param {type:\"integer\"}\n",
-    "\n",
-    "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "CrxJqzAUKmPJ"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动提取特征（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 使用的进程数\n",
-    "THREADCOUNT = 8  # @param {type:\"integer\"}\n",
-    "# @markdown 音高提取算法\n",
-    "ALGO = \"harvest\"  # @param {type:\"string\"}\n",
-    "\n",
-    "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
-    "\n",
-    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "IMLPLKOaKj58"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动训练（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 使用的GPU\n",
-    "USEGPU = \"0\"  # @param {type:\"string\"}\n",
-    "# @markdown 批大小\n",
-    "BATCHSIZE = 32  # @param {type:\"integer\"}\n",
-    "# @markdown 停止的epoch\n",
-    "MODELEPOCH = 3200  # @param {type:\"integer\"}\n",
-    "# @markdown 保存epoch间隔\n",
-    "EPOCHSAVE = 100  # @param {type:\"integer\"}\n",
-    "# @markdown 采样率\n",
-    "MODELSAMPLE = \"48k\"  # @param {type:\"string\"}\n",
-    "# @markdown 是否缓存训练集\n",
-    "CACHEDATA = 1  # @param {type:\"integer\"}\n",
-    "# @markdown 是否仅保存最新的ckpt文件\n",
-    "ONLYLATEST = 0  # @param {type:\"integer\"}\n",
-    "\n",
-    "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "haYA81hySuDl"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 删除其它pth，只留选中的（慎点，仔细看代码）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 选中模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!echo \"备份选中的模型。。。\"\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"正在删除。。。\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n",
-    "\n",
-    "!echo \"恢复选中的模型。。。\"\n",
-    "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"删除完成\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "QhSiPTVPoIRh"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 选中模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!echo \"备份选中的模型。。。\"\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"正在删除。。。\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n",
-    "\n",
-    "!echo \"恢复选中的模型。。。\"\n",
-    "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"删除完成\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "private_outputs": true,
-   "provenance": []
-  },
-  "gpuClass": "standard",
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb b/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
deleted file mode 100644
index 0cad19f..0000000
--- a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
+++ /dev/null
@@ -1,422 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZFFCx5J80SGa"
-   },
-   "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "GmFP6bN9dvOq"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #查看显卡\n",
-    "!nvidia-smi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "jwu07JgqoFON"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 挂载谷歌云盘\n",
-    "\n",
-    "from google.colab import drive\n",
-    "\n",
-    "drive.mount(\"/content/drive\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "wjddIFr1oS3W"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #安装依赖\n",
-    "!apt-get -y install build-essential python3-dev ffmpeg\n",
-    "!pip3 install --upgrade setuptools wheel\n",
-    "!pip3 install --upgrade pip\n",
-    "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ge_97mfpgqTm"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #克隆仓库\n",
-    "\n",
-    "!mkdir Retrieval-based-Voice-Conversion-WebUI\n",
-    "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
-    "!git init\n",
-    "!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\n",
-    "!git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1\n",
-    "!git reset --hard FETCH_HEAD"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "BLDEZADkvlw1"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #更新仓库（一般无需执行）\n",
-    "!git pull"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "pqE0PrnuRqI2"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #安装aria2\n",
-    "!apt -y install -qq aria2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "UG3XpUwEomUz"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 下载底模\n",
-    "\n",
-    "# v1\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
-    "\n",
-    "# v2\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n",
-    "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "HugjmZqZRuiF"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #下载人声分离模型\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "2RCaT9FTR0ej"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #下载hubert_base\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# @title #下载rmvpe模型\n",
-    "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Mwk7Q0Loqzjx"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #从谷歌云盘加载打包好的数据集到/content/dataset\n",
-    "\n",
-    "# @markdown 数据集位置\n",
-    "DATASET = (\n",
-    "    \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  # @param {type:\"string\"}\n",
-    ")\n",
-    "\n",
-    "!mkdir -p /content/dataset\n",
-    "!unzip -d /content/dataset -B {DATASET}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "PDlFxWHWEynD"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #重命名数据集中的重名文件\n",
-    "!ls -a /content/dataset/\n",
-    "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "7vh6vphDwO0b"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #启动webui\n",
-    "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
-    "# %load_ext tensorboard\n",
-    "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
-    "!python3 infer-web.py --colab --pycmd python3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "FgJuNeAwx5Y_"
-   },
-   "outputs": [],
-   "source": [
-    "# @title #手动将训练后的模型文件备份到谷歌云盘\n",
-    "# @markdown #需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
-    "\n",
-    "# @markdown #模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown #模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
-    "\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "OVQoLQJXS7WX"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 从谷歌云盘恢复pth\n",
-    "# @markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
-    "\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 模型epoch\n",
-    "MODELEPOCH = 7500  # @param {type:\"integer\"}\n",
-    "\n",
-    "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "!cp /content/drive/MyDrive/*.index /content/\n",
-    "!cp /content/drive/MyDrive/*.npy /content/\n",
-    "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "ZKAyuKb9J6dz"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动预处理（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 采样率\n",
-    "BITRATE = 48000  # @param {type:\"integer\"}\n",
-    "# @markdown 使用的进程数\n",
-    "THREADCOUNT = 8  # @param {type:\"integer\"}\n",
-    "\n",
-    "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "CrxJqzAUKmPJ"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动提取特征（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 使用的进程数\n",
-    "THREADCOUNT = 8  # @param {type:\"integer\"}\n",
-    "# @markdown 音高提取算法\n",
-    "ALGO = \"harvest\"  # @param {type:\"string\"}\n",
-    "\n",
-    "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
-    "\n",
-    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "IMLPLKOaKj58"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 手动训练（不推荐）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 使用的GPU\n",
-    "USEGPU = \"0\"  # @param {type:\"string\"}\n",
-    "# @markdown 批大小\n",
-    "BATCHSIZE = 32  # @param {type:\"integer\"}\n",
-    "# @markdown 停止的epoch\n",
-    "MODELEPOCH = 3200  # @param {type:\"integer\"}\n",
-    "# @markdown 保存epoch间隔\n",
-    "EPOCHSAVE = 100  # @param {type:\"integer\"}\n",
-    "# @markdown 采样率\n",
-    "MODELSAMPLE = \"48k\"  # @param {type:\"string\"}\n",
-    "# @markdown 是否缓存训练集\n",
-    "CACHEDATA = 1  # @param {type:\"integer\"}\n",
-    "# @markdown 是否仅保存最新的ckpt文件\n",
-    "ONLYLATEST = 0  # @param {type:\"integer\"}\n",
-    "\n",
-    "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "haYA81hySuDl"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 删除其它pth，只留选中的（慎点，仔细看代码）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 选中模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!echo \"备份选中的模型。。。\"\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"正在删除。。。\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n",
-    "\n",
-    "!echo \"恢复选中的模型。。。\"\n",
-    "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"删除完成\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "QhSiPTVPoIRh"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）\n",
-    "# @markdown 模型名\n",
-    "MODELNAME = \"lulu\"  # @param {type:\"string\"}\n",
-    "# @markdown 选中模型epoch\n",
-    "MODELEPOCH = 9600  # @param {type:\"integer\"}\n",
-    "\n",
-    "!echo \"备份选中的模型。。。\"\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
-    "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"正在删除。。。\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
-    "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n",
-    "\n",
-    "!echo \"恢复选中的模型。。。\"\n",
-    "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
-    "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
-    "\n",
-    "!echo \"删除完成\"\n",
-    "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "private_outputs": true,
-   "provenance": []
-  },
-  "gpuClass": "standard",
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/api_231006.py b/api_231006.py
deleted file mode 100644
index 56e26e2..0000000
--- a/api_231006.py
+++ /dev/null
@@ -1,440 +0,0 @@
-#api for 231006 release version by Xiaokai
-import os
-import sys
-import json
-import re
-import time
-import librosa
-import torch
-import numpy as np
-import torch.nn.functional as F
-import torchaudio.transforms as tat
-import sounddevice as sd
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-import threading
-import uvicorn
-import logging
-
-# Initialize the logger
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# Define FastAPI app
-app = FastAPI()
-
-class GUIConfig:
-    def __init__(self) -> None:
-        self.pth_path: str = ""
-        self.index_path: str = ""
-        self.pitch: int = 0
-        self.samplerate: int = 40000
-        self.block_time: float = 1.0  # s
-        self.buffer_num: int = 1
-        self.threhold: int = -60
-        self.crossfade_time: float = 0.05
-        self.extra_time: float = 2.5
-        self.I_noise_reduce = False
-        self.O_noise_reduce = False
-        self.rms_mix_rate = 0.0
-        self.index_rate = 0.3
-        self.f0method = "rmvpe"
-        self.sg_input_device = ""
-        self.sg_output_device = ""
-
-class ConfigData(BaseModel):
-    pth_path: str
-    index_path: str
-    sg_input_device: str
-    sg_output_device: str
-    threhold: int = -60
-    pitch: int = 0
-    index_rate: float = 0.3
-    rms_mix_rate: float = 0.0
-    block_time: float = 0.25
-    crossfade_length: float = 0.05
-    extra_time: float = 2.5
-    n_cpu: int = 4
-    I_noise_reduce: bool = False
-    O_noise_reduce: bool = False
-
-class AudioAPI:
-    def __init__(self) -> None:
-        self.gui_config = GUIConfig()
-        self.config = None  # Initialize Config object as None
-        self.flag_vc = False
-        self.function = "vc"
-        self.delay_time = 0
-        self.rvc = None  # Initialize RVC object as None
-
-    def load(self):
-        input_devices, output_devices, _, _ = self.get_devices()
-        try:
-            with open("configs/config.json", "r", encoding='utf-8') as j:
-                data = json.load(j)
-                data["rmvpe"] = True  # Ensure rmvpe is the only f0method
-                if data["sg_input_device"] not in input_devices:
-                    data["sg_input_device"] = input_devices[sd.default.device[0]]
-                if data["sg_output_device"] not in output_devices:
-                    data["sg_output_device"] = output_devices[sd.default.device[1]]
-        except Exception as e:
-            logger.error(f"Failed to load configuration: {e}")
-            with open("configs/config.json", "w", encoding='utf-8') as j:
-                data = {
-                    "pth_path": " ",
-                    "index_path": " ",
-                    "sg_input_device": input_devices[sd.default.device[0]],
-                    "sg_output_device": output_devices[sd.default.device[1]],
-                    "threhold": "-60",
-                    "pitch": "0",
-                    "index_rate": "0",
-                    "rms_mix_rate": "0",
-                    "block_time": "0.25",
-                    "crossfade_length": "0.05",
-                    "extra_time": "2.5",
-                    "f0method": "rmvpe",
-                    "use_jit": False,
-                }
-                data["rmvpe"] = True  # Ensure rmvpe is the only f0method
-                json.dump(data, j, ensure_ascii=False)
-        return data
-
-    def set_values(self, values):
-        logger.info(f"Setting values: {values}")
-        if not values.pth_path.strip():
-            raise HTTPException(status_code=400, detail="Please select a .pth file")
-        if not values.index_path.strip():
-            raise HTTPException(status_code=400, detail="Please select an index file")
-        self.set_devices(values.sg_input_device, values.sg_output_device)
-        self.config.use_jit = False
-        self.gui_config.pth_path = values.pth_path
-        self.gui_config.index_path = values.index_path
-        self.gui_config.threhold = values.threhold
-        self.gui_config.pitch = values.pitch
-        self.gui_config.block_time = values.block_time
-        self.gui_config.crossfade_time = values.crossfade_length
-        self.gui_config.extra_time = values.extra_time
-        self.gui_config.I_noise_reduce = values.I_noise_reduce
-        self.gui_config.O_noise_reduce = values.O_noise_reduce
-        self.gui_config.rms_mix_rate = values.rms_mix_rate
-        self.gui_config.index_rate = values.index_rate
-        self.gui_config.n_cpu = values.n_cpu
-        self.gui_config.f0method = "rmvpe"
-        return True
-
-    def start_vc(self):
-        torch.cuda.empty_cache()
-        self.flag_vc = True
-        self.rvc = rvc_for_realtime.RVC(
-            self.gui_config.pitch,
-            self.gui_config.pth_path,
-            self.gui_config.index_path,
-            self.gui_config.index_rate,
-            0,
-            0,
-            0,
-            self.config,
-            self.rvc if self.rvc else None,
-        )
-        self.gui_config.samplerate = self.rvc.tgt_sr
-        self.zc = self.rvc.tgt_sr // 100
-        self.block_frame = (
-            int(
-                np.round(
-                    self.gui_config.block_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.block_frame_16k = 160 * self.block_frame // self.zc
-        self.crossfade_frame = (
-            int(
-                np.round(
-                    self.gui_config.crossfade_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.sola_search_frame = self.zc
-        self.extra_frame = (
-            int(
-                np.round(
-                    self.gui_config.extra_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.input_wav = torch.zeros(
-            self.extra_frame + self.crossfade_frame + self.sola_search_frame + self.block_frame,
-            device=self.config.device,
-            dtype=torch.float32,
-        )
-        self.input_wav_res = torch.zeros(
-            160 * self.input_wav.shape[0] // self.zc,
-            device=self.config.device,
-            dtype=torch.float32,
-        )
-        self.pitch = np.zeros(self.input_wav.shape[0] // self.zc, dtype="int32")
-        self.pitchf = np.zeros(self.input_wav.shape[0] // self.zc, dtype="float64")
-        self.sola_buffer = torch.zeros(self.crossfade_frame, device=self.config.device, dtype=torch.float32)
-        self.nr_buffer = self.sola_buffer.clone()
-        self.output_buffer = self.input_wav.clone()
-        self.res_buffer = torch.zeros(2 * self.zc, device=self.config.device, dtype=torch.float32)
-        self.valid_rate = 1 - (self.extra_frame - 1) / self.input_wav.shape[0]
-        self.fade_in_window = (
-            torch.sin(0.5 * np.pi * torch.linspace(0.0, 1.0, steps=self.crossfade_frame, device=self.config.device, dtype=torch.float32)) ** 2
-        )
-        self.fade_out_window = 1 - self.fade_in_window
-        self.resampler = tat.Resample(
-            orig_freq=self.gui_config.samplerate,
-            new_freq=16000,
-            dtype=torch.float32,
-        ).to(self.config.device)
-        self.tg = TorchGate(
-            sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9
-        ).to(self.config.device)
-        thread_vc = threading.Thread(target=self.soundinput)
-        thread_vc.start()
-
-    def soundinput(self):
-        channels = 1 if sys.platform == "darwin" else 2
-        with sd.Stream(
-            channels=channels,
-            callback=self.audio_callback,
-            blocksize=self.block_frame,
-            samplerate=self.gui_config.samplerate,
-            dtype="float32",
-        ) as stream:
-            global stream_latency
-            stream_latency = stream.latency[-1]
-            while self.flag_vc:
-                time.sleep(self.gui_config.block_time)
-                logger.info("Audio block passed.")
-        logger.info("Ending VC")
-
-    def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
-        start_time = time.perf_counter()
-        indata = librosa.to_mono(indata.T)
-        if self.gui_config.threhold > -60:
-            rms = librosa.feature.rms(y=indata, frame_length=4 * self.zc, hop_length=self.zc)
-            db_threhold = (librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold)
-            for i in range(db_threhold.shape[0]):
-                if db_threhold[i]:
-                    indata[i * self.zc : (i + 1) * self.zc] = 0
-        self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone()
-        self.input_wav[-self.block_frame :] = torch.from_numpy(indata).to(self.config.device)
-        self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone()
-        if self.gui_config.I_noise_reduce and self.function == "vc":
-            input_wav = self.input_wav[-self.crossfade_frame - self.block_frame - 2 * self.zc :]
-            input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0))[0, 2 * self.zc :]
-            input_wav[: self.crossfade_frame] *= self.fade_in_window
-            input_wav[: self.crossfade_frame] += self.nr_buffer * self.fade_out_window
-            self.nr_buffer[:] = input_wav[-self.crossfade_frame :]
-            input_wav = torch.cat((self.res_buffer[:], input_wav[: self.block_frame]))
-            self.res_buffer[:] = input_wav[-2 * self.zc :]
-            self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(input_wav)[160:]
-        else:
-            self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(self.input_wav[-self.block_frame - 2 * self.zc :])[160:]
-        if self.function == "vc":
-            f0_extractor_frame = self.block_frame_16k + 800
-            if self.gui_config.f0method == "rmvpe":
-                f0_extractor_frame = (5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160)
-            infer_wav = self.rvc.infer(
-                self.input_wav_res,
-                self.input_wav_res[-f0_extractor_frame:].cpu().numpy(),
-                self.block_frame_16k,
-                self.valid_rate,
-                self.pitch,
-                self.pitchf,
-                self.gui_config.f0method,
-            )
-            infer_wav = infer_wav[-self.crossfade_frame - self.sola_search_frame - self.block_frame :]
-        else:
-            infer_wav = self.input_wav[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].clone()
-        if (self.gui_config.O_noise_reduce and self.function == "vc") or (self.gui_config.I_noise_reduce and self.function == "im"):
-            self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone()
-            self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :]
-            infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0)
-        if self.gui_config.rms_mix_rate < 1 and self.function == "vc":
-            rms1 = librosa.feature.rms(y=self.input_wav_res[-160 * infer_wav.shape[0] // self.zc :].cpu().numpy(), frame_length=640, hop_length=160)
-            rms1 = torch.from_numpy(rms1).to(self.config.device)
-            rms1 = F.interpolate(rms1.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear", align_corners=True)[0, 0, :-1]
-            rms2 = librosa.feature.rms(y=infer_wav[:].cpu().numpy(), frame_length=4 * self.zc, hop_length=self.zc)
-            rms2 = torch.from_numpy(rms2).to(self.config.device)
-            rms2 = F.interpolate(rms2.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear", align_corners=True)[0, 0, :-1]
-            rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
-            infer_wav *= torch.pow(rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate))
-        conv_input = infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame]
-        cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
-        cor_den = torch.sqrt(F.conv1d(conv_input**2, torch.ones(1, 1, self.crossfade_frame, device=self.config.device)) + 1e-8)
-        if sys.platform == "darwin":
-            _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
-            sola_offset = sola_offset.item()
-        else:
-            sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
-        logger.info(f"sola_offset = {sola_offset}")
-        infer_wav = infer_wav[sola_offset : sola_offset + self.block_frame + self.crossfade_frame]
-        infer_wav[: self.crossfade_frame] *= self.fade_in_window
-        infer_wav[: self.crossfade_frame] += self.sola_buffer * self.fade_out_window
-        self.sola_buffer[:] = infer_wav[-self.crossfade_frame :]
-        if sys.platform == "darwin":
-            outdata[:] = infer_wav[: -self.crossfade_frame].cpu().numpy()[:, np.newaxis]
-        else:
-            outdata[:] = infer_wav[: -self.crossfade_frame].repeat(2, 1).t().cpu().numpy()
-        total_time = time.perf_counter() - start_time
-        logger.info(f"Infer time: {total_time:.2f}")
-
-    def get_devices(self, update: bool = True):
-        if update:
-            sd._terminate()
-            sd._initialize()
-        devices = sd.query_devices()
-        hostapis = sd.query_hostapis()
-        for hostapi in hostapis:
-            for device_idx in hostapi["devices"]:
-                devices[device_idx]["hostapi_name"] = hostapi["name"]
-        input_devices = [
-            f"{d['name']} ({d['hostapi_name']})"
-            for d in devices
-            if d["max_input_channels"] > 0
-        ]
-        output_devices = [
-            f"{d['name']} ({d['hostapi_name']})"
-            for d in devices
-            if d["max_output_channels"] > 0
-        ]
-        input_devices_indices = [
-            d["index"] if "index" in d else d["name"]
-            for d in devices
-            if d["max_input_channels"] > 0
-        ]
-        output_devices_indices = [
-            d["index"] if "index" in d else d["name"]
-            for d in devices
-            if d["max_output_channels"] > 0
-        ]
-        return (
-            input_devices,
-            output_devices,
-            input_devices_indices,
-            output_devices_indices,
-        )
-
-    def set_devices(self, input_device, output_device):
-        (
-            input_devices,
-            output_devices,
-            input_device_indices,
-            output_device_indices,
-        ) = self.get_devices()
-        logger.debug(f"Available input devices: {input_devices}")
-        logger.debug(f"Available output devices: {output_devices}")
-        logger.debug(f"Selected input device: {input_device}")
-        logger.debug(f"Selected output device: {output_device}")
-
-        if input_device not in input_devices:
-            logger.error(f"Input device '{input_device}' is not in the list of available devices")
-            raise HTTPException(status_code=400, detail=f"Input device '{input_device}' is not available")
-        
-        if output_device not in output_devices:
-            logger.error(f"Output device '{output_device}' is not in the list of available devices")
-            raise HTTPException(status_code=400, detail=f"Output device '{output_device}' is not available")
-
-        sd.default.device[0] = input_device_indices[input_devices.index(input_device)]
-        sd.default.device[1] = output_device_indices[output_devices.index(output_device)]
-        logger.info(f"Input device set to {sd.default.device[0]}: {input_device}")
-        logger.info(f"Output device set to {sd.default.device[1]}: {output_device}")
-
-audio_api = AudioAPI()
-
-@app.get("/inputDevices", response_model=list)
-def get_input_devices():
-    try:
-        input_devices, _, _, _ = audio_api.get_devices()
-        return input_devices
-    except Exception as e:
-        logger.error(f"Failed to get input devices: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get input devices")
-
-@app.get("/outputDevices", response_model=list)
-def get_output_devices():
-    try:
-        _, output_devices, _, _ = audio_api.get_devices()
-        return output_devices
-    except Exception as e:
-        logger.error(f"Failed to get output devices: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get output devices")
-
-@app.post("/config")
-def configure_audio(config_data: ConfigData):
-    try:
-        logger.info(f"Configuring audio with data: {config_data}")
-        if audio_api.set_values(config_data):
-            settings = config_data.dict()
-            settings["use_jit"] = False
-            settings["f0method"] = "rmvpe"
-            with open("configs/config.json", "w", encoding='utf-8') as j:
-                json.dump(settings, j, ensure_ascii=False)
-            logger.info("Configuration set successfully")
-            return {"message": "Configuration set successfully"}
-    except HTTPException as e:
-        logger.error(f"Configuration error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Configuration failed: {e}")
-        raise HTTPException(status_code=400, detail=f"Configuration failed: {e}")
-
-@app.post("/start")
-def start_conversion():
-    try:
-        if not audio_api.flag_vc:
-            audio_api.start_vc()
-            return {"message": "Audio conversion started"}
-        else:
-            logger.warning("Audio conversion already running")
-            raise HTTPException(status_code=400, detail="Audio conversion already running")
-    except HTTPException as e:
-        logger.error(f"Start conversion error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Failed to start conversion: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to start conversion: {e}")
-
-@app.post("/stop")
-def stop_conversion():
-    try:
-        if audio_api.flag_vc:
-            audio_api.flag_vc = False
-            global stream_latency
-            stream_latency = -1
-            return {"message": "Audio conversion stopped"}
-        else:
-            logger.warning("Audio conversion not running")
-            raise HTTPException(status_code=400, detail="Audio conversion not running")
-    except HTTPException as e:
-        logger.error(f"Stop conversion error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Failed to stop conversion: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to stop conversion: {e}")
-
-if __name__ == "__main__":
-    if sys.platform == "win32":
-        from multiprocessing import freeze_support
-        freeze_support()
-    load_dotenv()
-    os.environ["OMP_NUM_THREADS"] = "4"
-    if sys.platform == "darwin":
-        os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-    from tools.torchgate import TorchGate
-    import tools.rvc_for_realtime as rvc_for_realtime
-    from configs.config import Config
-    audio_api.config = Config()
-    uvicorn.run(app, host="0.0.0.0", port=6242)
diff --git a/api_240604.py b/api_240604.py
deleted file mode 100644
index 08227ce..0000000
--- a/api_240604.py
+++ /dev/null
@@ -1,565 +0,0 @@
-#api for 240604 release version by Xiaokai
-import os
-import sys
-import json
-import re
-import time
-import librosa
-import torch
-import numpy as np
-import torch.nn.functional as F
-import torchaudio.transforms as tat
-import sounddevice as sd
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-import threading
-import uvicorn
-import logging
-from multiprocessing import Queue, Process, cpu_count, freeze_support
-
-# Initialize the logger
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# Define FastAPI app
-app = FastAPI()
-
-class GUIConfig:
-    def __init__(self) -> None:
-        self.pth_path: str = ""
-        self.index_path: str = ""
-        self.pitch: int = 0
-        self.formant: float = 0.0
-        self.sr_type: str = "sr_model"
-        self.block_time: float = 0.25  # s
-        self.threhold: int = -60
-        self.crossfade_time: float = 0.05
-        self.extra_time: float = 2.5
-        self.I_noise_reduce: bool = False
-        self.O_noise_reduce: bool = False
-        self.use_pv: bool = False
-        self.rms_mix_rate: float = 0.0
-        self.index_rate: float = 0.0
-        self.n_cpu: int = 4
-        self.f0method: str = "fcpe"
-        self.sg_input_device: str = ""
-        self.sg_output_device: str = ""
-
-class ConfigData(BaseModel):
-    pth_path: str
-    index_path: str
-    sg_input_device: str
-    sg_output_device: str
-    threhold: int = -60
-    pitch: int = 0
-    formant: float = 0.0
-    index_rate: float = 0.3
-    rms_mix_rate: float = 0.0
-    block_time: float = 0.25
-    crossfade_length: float = 0.05
-    extra_time: float = 2.5
-    n_cpu: int = 4
-    I_noise_reduce: bool = False
-    O_noise_reduce: bool = False
-    use_pv: bool = False
-    f0method: str = "fcpe"
-
-class Harvest(Process):
-    def __init__(self, inp_q, opt_q):
-        super(Harvest, self).__init__()
-        self.inp_q = inp_q
-        self.opt_q = opt_q
-
-    def run(self):
-        import numpy as np
-        import pyworld
-        while True:
-            idx, x, res_f0, n_cpu, ts = self.inp_q.get()
-            f0, t = pyworld.harvest(
-                x.astype(np.double),
-                fs=16000,
-                f0_ceil=1100,
-                f0_floor=50,
-                frame_period=10,
-            )
-            res_f0[idx] = f0
-            if len(res_f0.keys()) >= n_cpu:
-                self.opt_q.put(ts)
-
-class AudioAPI:
-    def __init__(self) -> None:
-        self.gui_config = GUIConfig()
-        self.config = None  # Initialize Config object as None
-        self.flag_vc = False
-        self.function = "vc"
-        self.delay_time = 0
-        self.rvc = None  # Initialize RVC object as None
-        self.inp_q = None
-        self.opt_q = None
-        self.n_cpu = min(cpu_count(), 8)
-
-    def initialize_queues(self):
-        self.inp_q = Queue()
-        self.opt_q = Queue()
-        for _ in range(self.n_cpu):
-            p = Harvest(self.inp_q, self.opt_q)
-            p.daemon = True
-            p.start()
-
-    def load(self):
-        input_devices, output_devices, _, _ = self.get_devices()
-        try:
-            with open("configs/config.json", "r", encoding='utf-8') as j:
-                data = json.load(j)
-                if data["sg_input_device"] not in input_devices:
-                    data["sg_input_device"] = input_devices[sd.default.device[0]]
-                if data["sg_output_device"] not in output_devices:
-                    data["sg_output_device"] = output_devices[sd.default.device[1]]
-        except Exception as e:
-            logger.error(f"Failed to load configuration: {e}")
-            with open("configs/config.json", "w", encoding='utf-8') as j:
-                data = {
-                    "pth_path": "",
-                    "index_path": "",
-                    "sg_input_device": input_devices[sd.default.device[0]],
-                    "sg_output_device": output_devices[sd.default.device[1]],
-                    "threhold": -60,
-                    "pitch": 0,
-                    "formant": 0.0,
-                    "index_rate": 0,
-                    "rms_mix_rate": 0,
-                    "block_time": 0.25,
-                    "crossfade_length": 0.05,
-                    "extra_time": 2.5,
-                    "n_cpu": 4,
-                    "f0method": "fcpe",
-                    "use_jit": False,
-                    "use_pv": False,
-                }
-                json.dump(data, j, ensure_ascii=False)
-        return data
-
-    def set_values(self, values):
-        logger.info(f"Setting values: {values}")
-        if not values.pth_path.strip():
-            raise HTTPException(status_code=400, detail="Please select a .pth file")
-        if not values.index_path.strip():
-            raise HTTPException(status_code=400, detail="Please select an index file")
-        self.set_devices(values.sg_input_device, values.sg_output_device)
-        self.config.use_jit = False
-        self.gui_config.pth_path = values.pth_path
-        self.gui_config.index_path = values.index_path
-        self.gui_config.threhold = values.threhold
-        self.gui_config.pitch = values.pitch
-        self.gui_config.formant = values.formant
-        self.gui_config.block_time = values.block_time
-        self.gui_config.crossfade_time = values.crossfade_length
-        self.gui_config.extra_time = values.extra_time
-        self.gui_config.I_noise_reduce = values.I_noise_reduce
-        self.gui_config.O_noise_reduce = values.O_noise_reduce
-        self.gui_config.rms_mix_rate = values.rms_mix_rate
-        self.gui_config.index_rate = values.index_rate
-        self.gui_config.n_cpu = values.n_cpu
-        self.gui_config.use_pv = values.use_pv
-        self.gui_config.f0method = values.f0method
-        return True
-
-    def start_vc(self):
-        torch.cuda.empty_cache()
-        self.flag_vc = True
-        self.rvc = rvc_for_realtime.RVC(
-            self.gui_config.pitch,
-            self.gui_config.pth_path,
-            self.gui_config.index_path,
-            self.gui_config.index_rate,
-            self.gui_config.n_cpu,
-            self.inp_q,
-            self.opt_q,
-            self.config,
-            self.rvc if self.rvc else None,
-        )
-        self.gui_config.samplerate = (
-            self.rvc.tgt_sr
-            if self.gui_config.sr_type == "sr_model"
-            else self.get_device_samplerate()
-        )
-        self.zc = self.gui_config.samplerate // 100
-        self.block_frame = (
-            int(
-                np.round(
-                    self.gui_config.block_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.block_frame_16k = 160 * self.block_frame // self.zc
-        self.crossfade_frame = (
-            int(
-                np.round(
-                    self.gui_config.crossfade_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.sola_buffer_frame = min(self.crossfade_frame, 4 * self.zc)
-        self.sola_search_frame = self.zc
-        self.extra_frame = (
-            int(
-                np.round(
-                    self.gui_config.extra_time
-                    * self.gui_config.samplerate
-                    / self.zc
-                )
-            )
-            * self.zc
-        )
-        self.input_wav = torch.zeros(
-            self.extra_frame
-            + self.crossfade_frame
-            + self.sola_search_frame
-            + self.block_frame,
-            device=self.config.device,
-            dtype=torch.float32,
-        )
-        self.input_wav_denoise = self.input_wav.clone()
-        self.input_wav_res = torch.zeros(
-            160 * self.input_wav.shape[0] // self.zc,
-            device=self.config.device,
-            dtype=torch.float32,
-        )
-        self.rms_buffer = np.zeros(4 * self.zc, dtype="float32")
-        self.sola_buffer = torch.zeros(
-            self.sola_buffer_frame, device=self.config.device, dtype=torch.float32
-        )
-        self.nr_buffer = self.sola_buffer.clone()
-        self.output_buffer = self.input_wav.clone()
-        self.skip_head = self.extra_frame // self.zc
-        self.return_length = (
-            self.block_frame + self.sola_buffer_frame + self.sola_search_frame
-        ) // self.zc
-        self.fade_in_window = (
-            torch.sin(
-                0.5
-                * np.pi
-                * torch.linspace(
-                    0.0,
-                    1.0,
-                    steps=self.sola_buffer_frame,
-                    device=self.config.device,
-                    dtype=torch.float32,
-                )
-            )
-            ** 2
-        )
-        self.fade_out_window = 1 - self.fade_in_window
-        self.resampler = tat.Resample(
-            orig_freq=self.gui_config.samplerate,
-            new_freq=16000,
-            dtype=torch.float32,
-        ).to(self.config.device)
-        if self.rvc.tgt_sr != self.gui_config.samplerate:
-            self.resampler2 = tat.Resample(
-                orig_freq=self.rvc.tgt_sr,
-                new_freq=self.gui_config.samplerate,
-                dtype=torch.float32,
-            ).to(self.config.device)
-        else:
-            self.resampler2 = None
-        self.tg = TorchGate(
-            sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9
-        ).to(self.config.device)
-        thread_vc = threading.Thread(target=self.soundinput)
-        thread_vc.start()
-
-    def soundinput(self):
-        channels = 1 if sys.platform == "darwin" else 2
-        with sd.Stream(
-            channels=channels,
-            callback=self.audio_callback,
-            blocksize=self.block_frame,
-            samplerate=self.gui_config.samplerate,
-            dtype="float32",
-        ) as stream:
-            global stream_latency
-            stream_latency = stream.latency[-1]
-            while self.flag_vc:
-                time.sleep(self.gui_config.block_time)
-                logger.info("Audio block passed.")
-        logger.info("Ending VC")
-
-    def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status):
-        start_time = time.perf_counter()
-        indata = librosa.to_mono(indata.T)
-        if self.gui_config.threhold > -60:
-            indata = np.append(self.rms_buffer, indata)
-            rms = librosa.feature.rms(y=indata, frame_length=4 * self.zc, hop_length=self.zc)[:, 2:]
-            self.rms_buffer[:] = indata[-4 * self.zc :]
-            indata = indata[2 * self.zc - self.zc // 2 :]
-            db_threhold = (
-                librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold
-            )
-            for i in range(db_threhold.shape[0]):
-                if db_threhold[i]:
-                    indata[i * self.zc : (i + 1) * self.zc] = 0
-            indata = indata[self.zc // 2 :]
-        self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone()
-        self.input_wav[-indata.shape[0] :] = torch.from_numpy(indata).to(self.config.device)
-        self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone()
-        # input noise reduction and resampling
-        if self.gui_config.I_noise_reduce:
-            self.input_wav_denoise[: -self.block_frame] = self.input_wav_denoise[self.block_frame :].clone()
-            input_wav = self.input_wav[-self.sola_buffer_frame - self.block_frame :]
-            input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)).squeeze(0)
-            input_wav[: self.sola_buffer_frame] *= self.fade_in_window
-            input_wav[: self.sola_buffer_frame] += self.nr_buffer * self.fade_out_window
-            self.input_wav_denoise[-self.block_frame :] = input_wav[: self.block_frame]
-            self.nr_buffer[:] = input_wav[self.block_frame :]
-            self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
-                self.input_wav_denoise[-self.block_frame - 2 * self.zc :]
-            )[160:]
-        else:
-            self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = (
-                self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[160:]
-            )
-        # infer
-        if self.function == "vc":
-            infer_wav = self.rvc.infer(
-                self.input_wav_res,
-                self.block_frame_16k,
-                self.skip_head,
-                self.return_length,
-                self.gui_config.f0method,
-            )
-            if self.resampler2 is not None:
-                infer_wav = self.resampler2(infer_wav)
-        elif self.gui_config.I_noise_reduce:
-            infer_wav = self.input_wav_denoise[self.extra_frame :].clone()
-        else:
-            infer_wav = self.input_wav[self.extra_frame :].clone()
-        # output noise reduction
-        if self.gui_config.O_noise_reduce and self.function == "vc":
-            self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone()
-            self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :]
-            infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0)
-        # volume envelop mixing
-        if self.gui_config.rms_mix_rate < 1 and self.function == "vc":
-            if self.gui_config.I_noise_reduce:
-                input_wav = self.input_wav_denoise[self.extra_frame :]
-            else:
-                input_wav = self.input_wav[self.extra_frame :]
-            rms1 = librosa.feature.rms(
-                y=input_wav[: infer_wav.shape[0]].cpu().numpy(),
-                frame_length=4 * self.zc,
-                hop_length=self.zc,
-            )
-            rms1 = torch.from_numpy(rms1).to(self.config.device)
-            rms1 = F.interpolate(
-                rms1.unsqueeze(0),
-                size=infer_wav.shape[0] + 1,
-                mode="linear",
-                align_corners=True,
-            )[0, 0, :-1]
-            rms2 = librosa.feature.rms(
-                y=infer_wav[:].cpu().numpy(),
-                frame_length=4 * self.zc,
-                hop_length=self.zc,
-            )
-            rms2 = torch.from_numpy(rms2).to(self.config.device)
-            rms2 = F.interpolate(
-                rms2.unsqueeze(0),
-                size=infer_wav.shape[0] + 1,
-                mode="linear",
-                align_corners=True,
-            )[0, 0, :-1]
-            rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
-            infer_wav *= torch.pow(
-                rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate)
-            )
-        # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
-        conv_input = infer_wav[None, None, : self.sola_buffer_frame + self.sola_search_frame]
-        cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
-        cor_den = torch.sqrt(
-            F.conv1d(
-                conv_input**2,
-                torch.ones(1, 1, self.sola_buffer_frame, device=self.config.device),
-            )
-            + 1e-8
-        )
-        if sys.platform == "darwin":
-            _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
-            sola_offset = sola_offset.item()
-        else:
-            sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
-        logger.info(f"sola_offset = {sola_offset}")
-        infer_wav = infer_wav[sola_offset:]
-        if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv:
-            infer_wav[: self.sola_buffer_frame] *= self.fade_in_window
-            infer_wav[: self.sola_buffer_frame] += self.sola_buffer * self.fade_out_window
-        else:
-            infer_wav[: self.sola_buffer_frame] = phase_vocoder(
-                self.sola_buffer,
-                infer_wav[: self.sola_buffer_frame],
-                self.fade_out_window,
-                self.fade_in_window,
-            )
-        self.sola_buffer[:] = infer_wav[
-            self.block_frame : self.block_frame + self.sola_buffer_frame
-        ]
-        if sys.platform == "darwin":
-            outdata[:] = infer_wav[: self.block_frame].cpu().numpy()[:, np.newaxis]
-        else:
-            outdata[:] = infer_wav[: self.block_frame].repeat(2, 1).t().cpu().numpy()
-        total_time = time.perf_counter() - start_time
-        logger.info(f"Infer time: {total_time:.2f}")
-
-    def get_devices(self, update: bool = True):
-        if update:
-            sd._terminate()
-            sd._initialize()
-        devices = sd.query_devices()
-        hostapis = sd.query_hostapis()
-        for hostapi in hostapis:
-            for device_idx in hostapi["devices"]:
-                devices[device_idx]["hostapi_name"] = hostapi["name"]
-        input_devices = [
-            f"{d['name']} ({d['hostapi_name']})"
-            for d in devices
-            if d["max_input_channels"] > 0
-        ]
-        output_devices = [
-            f"{d['name']} ({d['hostapi_name']})"
-            for d in devices
-            if d["max_output_channels"] > 0
-        ]
-        input_devices_indices = [
-            d["index"] if "index" in d else d["name"]
-            for d in devices
-            if d["max_input_channels"] > 0
-        ]
-        output_devices_indices = [
-            d["index"] if "index" in d else d["name"]
-            for d in devices
-            if d["max_output_channels"] > 0
-        ]
-        return (
-            input_devices,
-            output_devices,
-            input_devices_indices,
-            output_devices_indices,
-        )
-
-    def set_devices(self, input_device, output_device):
-        (
-            input_devices,
-            output_devices,
-            input_device_indices,
-            output_device_indices,
-        ) = self.get_devices()
-        logger.debug(f"Available input devices: {input_devices}")
-        logger.debug(f"Available output devices: {output_devices}")
-        logger.debug(f"Selected input device: {input_device}")
-        logger.debug(f"Selected output device: {output_device}")
-
-        if input_device not in input_devices:
-            logger.error(f"Input device '{input_device}' is not in the list of available devices")
-            raise HTTPException(status_code=400, detail=f"Input device '{input_device}' is not available")
-        
-        if output_device not in output_devices:
-            logger.error(f"Output device '{output_device}' is not in the list of available devices")
-            raise HTTPException(status_code=400, detail=f"Output device '{output_device}' is not available")
-
-        sd.default.device[0] = input_device_indices[input_devices.index(input_device)]
-        sd.default.device[1] = output_device_indices[output_devices.index(output_device)]
-        logger.info(f"Input device set to {sd.default.device[0]}: {input_device}")
-        logger.info(f"Output device set to {sd.default.device[1]}: {output_device}")
-
-audio_api = AudioAPI()
-
-@app.get("/inputDevices", response_model=list)
-def get_input_devices():
-    try:
-        input_devices, _, _, _ = audio_api.get_devices()
-        return input_devices
-    except Exception as e:
-        logger.error(f"Failed to get input devices: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get input devices")
-
-@app.get("/outputDevices", response_model=list)
-def get_output_devices():
-    try:
-        _, output_devices, _, _ = audio_api.get_devices()
-        return output_devices
-    except Exception as e:
-        logger.error(f"Failed to get output devices: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get output devices")
-
-@app.post("/config")
-def configure_audio(config_data: ConfigData):
-    try:
-        logger.info(f"Configuring audio with data: {config_data}")
-        if audio_api.set_values(config_data):
-            settings = config_data.dict()
-            settings["use_jit"] = False
-            with open("configs/config.json", "w", encoding='utf-8') as j:
-                json.dump(settings, j, ensure_ascii=False)
-            logger.info("Configuration set successfully")
-            return {"message": "Configuration set successfully"}
-    except HTTPException as e:
-        logger.error(f"Configuration error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Configuration failed: {e}")
-        raise HTTPException(status_code=400, detail=f"Configuration failed: {e}")
-
-@app.post("/start")
-def start_conversion():
-    try:
-        if not audio_api.flag_vc:
-            audio_api.start_vc()
-            return {"message": "Audio conversion started"}
-        else:
-            logger.warning("Audio conversion already running")
-            raise HTTPException(status_code=400, detail="Audio conversion already running")
-    except HTTPException as e:
-        logger.error(f"Start conversion error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Failed to start conversion: {e}")
-        raise HTTPException(status_code=500, detail="Failed to start conversion: {e}")
-
-@app.post("/stop")
-def stop_conversion():
-    try:
-        if audio_api.flag_vc:
-            audio_api.flag_vc = False
-            global stream_latency
-            stream_latency = -1
-            return {"message": "Audio conversion stopped"}
-        else:
-            logger.warning("Audio conversion not running")
-            raise HTTPException(status_code=400, detail="Audio conversion not running")
-    except HTTPException as e:
-        logger.error(f"Stop conversion error: {e.detail}")
-        raise
-    except Exception as e:
-        logger.error(f"Failed to stop conversion: {e}")
-        raise HTTPException(status_code=500, detail="Failed to stop conversion: {e}")
-
-if __name__ == "__main__":
-    if sys.platform == "win32":
-        freeze_support()
-    load_dotenv()
-    os.environ["OMP_NUM_THREADS"] = "4"
-    if sys.platform == "darwin":
-        os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-    from tools.torchgate import TorchGate
-    import tools.rvc_for_realtime as rvc_for_realtime
-    from configs.config import Config
-    audio_api.config = Config()
-    audio_api.initialize_queues()
-    uvicorn.run(app, host="0.0.0.0", port=6242)
diff --git a/configure_gpu_deps.py b/configure_gpu_deps.py
index 8cf36d4..ec326be 100644
--- a/configure_gpu_deps.py
+++ b/configure_gpu_deps.py
@@ -65,13 +65,15 @@ def process_lines(lines, target_gpu):
     return output_lines
 
 def main():
-    if len(sys.argv) != 3:
+    if len(sys.argv) != 2:
         print("Usage: python configure_gpu_deps.py <pyproject.toml> <gpu_type>")
         print("  where <gpu_type> is either 'nvidia' or 'amd'")
         sys.exit(1)
 
-    toml_path = sys.argv[1]
-    gpu_type = sys.argv[2].lower()
+    gpu_type = sys.argv[1].lower()
+    toml_path = "pyproject.toml"
+    with open(toml_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
     if gpu_type not in {"nvidia", "amd"}:
         print("gpu_type must be either 'nvidia' or 'amd'")
         sys.exit(1)
diff --git a/environment_dml.yaml b/environment_dml.yaml
deleted file mode 100644
index 0fb3f22..0000000
--- a/environment_dml.yaml
+++ /dev/null
@@ -1,186 +0,0 @@
-name: pydml
-channels:
-  - pytorch
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
-  - defaults
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/fastai/
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
-  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
-dependencies:
-  - abseil-cpp=20211102.0=hd77b12b_0
-  - absl-py=1.3.0=py310haa95532_0
-  - aiohttp=3.8.3=py310h2bbff1b_0
-  - aiosignal=1.2.0=pyhd3eb1b0_0
-  - async-timeout=4.0.2=py310haa95532_0
-  - attrs=22.1.0=py310haa95532_0
-  - blas=1.0=mkl
-  - blinker=1.4=py310haa95532_0
-  - bottleneck=1.3.5=py310h9128911_0
-  - brotli=1.0.9=h2bbff1b_7
-  - brotli-bin=1.0.9=h2bbff1b_7
-  - brotlipy=0.7.0=py310h2bbff1b_1002
-  - bzip2=1.0.8=he774522_0
-  - c-ares=1.19.0=h2bbff1b_0
-  - ca-certificates=2023.05.30=haa95532_0
-  - cachetools=4.2.2=pyhd3eb1b0_0
-  - certifi=2023.5.7=py310haa95532_0
-  - cffi=1.15.1=py310h2bbff1b_3
-  - charset-normalizer=2.0.4=pyhd3eb1b0_0
-  - click=8.0.4=py310haa95532_0
-  - colorama=0.4.6=py310haa95532_0
-  - contourpy=1.0.5=py310h59b6b97_0
-  - cryptography=39.0.1=py310h21b164f_0
-  - cycler=0.11.0=pyhd3eb1b0_0
-  - fonttools=4.25.0=pyhd3eb1b0_0
-  - freetype=2.12.1=ha860e81_0
-  - frozenlist=1.3.3=py310h2bbff1b_0
-  - giflib=5.2.1=h8cc25b3_3
-  - glib=2.69.1=h5dc1a3c_2
-  - google-auth=2.6.0=pyhd3eb1b0_0
-  - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
-  - grpc-cpp=1.48.2=hf108199_0
-  - grpcio=1.48.2=py310hf108199_0
-  - gst-plugins-base=1.18.5=h9e645db_0
-  - gstreamer=1.18.5=hd78058f_0
-  - icu=58.2=ha925a31_3
-  - idna=3.4=py310haa95532_0
-  - intel-openmp=2023.1.0=h59b6b97_46319
-  - jpeg=9e=h2bbff1b_1
-  - kiwisolver=1.4.4=py310hd77b12b_0
-  - krb5=1.19.4=h5b6d351_0
-  - lerc=3.0=hd77b12b_0
-  - libbrotlicommon=1.0.9=h2bbff1b_7
-  - libbrotlidec=1.0.9=h2bbff1b_7
-  - libbrotlienc=1.0.9=h2bbff1b_7
-  - libclang=14.0.6=default_hb5a9fac_1
-  - libclang13=14.0.6=default_h8e68704_1
-  - libdeflate=1.17=h2bbff1b_0
-  - libffi=3.4.4=hd77b12b_0
-  - libiconv=1.16=h2bbff1b_2
-  - libogg=1.3.5=h2bbff1b_1
-  - libpng=1.6.39=h8cc25b3_0
-  - libprotobuf=3.20.3=h23ce68f_0
-  - libtiff=4.5.0=h6c2663c_2
-  - libuv=1.44.2=h2bbff1b_0
-  - libvorbis=1.3.7=he774522_0
-  - libwebp=1.2.4=hbc33d0d_1
-  - libwebp-base=1.2.4=h2bbff1b_1
-  - libxml2=2.10.3=h0ad7f3c_0
-  - libxslt=1.1.37=h2bbff1b_0
-  - lz4-c=1.9.4=h2bbff1b_0
-  - markdown=3.4.1=py310haa95532_0
-  - markupsafe=2.1.1=py310h2bbff1b_0
-  - matplotlib=3.7.1=py310haa95532_1
-  - matplotlib-base=3.7.1=py310h4ed8f06_1
-  - mkl=2023.1.0=h8bd8f75_46356
-  - mkl-service=2.4.0=py310h2bbff1b_1
-  - mkl_fft=1.3.6=py310h4ed8f06_1
-  - mkl_random=1.2.2=py310h4ed8f06_1
-  - multidict=6.0.2=py310h2bbff1b_0
-  - munkres=1.1.4=py_0
-  - numexpr=2.8.4=py310h2cd9be0_1
-  - numpy=1.24.3=py310h055cbcc_1
-  - numpy-base=1.24.3=py310h65a83cf_1
-  - oauthlib=3.2.2=py310haa95532_0
-  - openssl=1.1.1t=h2bbff1b_0
-  - packaging=23.0=py310haa95532_0
-  - pandas=1.5.3=py310h4ed8f06_0
-  - pcre=8.45=hd77b12b_0
-  - pillow=9.4.0=py310hd77b12b_0
-  - pip=23.0.1=py310haa95532_0
-  - ply=3.11=py310haa95532_0
-  - protobuf=3.20.3=py310hd77b12b_0
-  - pyasn1=0.4.8=pyhd3eb1b0_0
-  - pyasn1-modules=0.2.8=py_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyjwt=2.4.0=py310haa95532_0
-  - pyopenssl=23.0.0=py310haa95532_0
-  - pyparsing=3.0.9=py310haa95532_0
-  - pyqt=5.15.7=py310hd77b12b_0
-  - pyqt5-sip=12.11.0=py310hd77b12b_0
-  - pysocks=1.7.1=py310haa95532_0
-  - python=3.10.11=h966fe2a_2
-  - python-dateutil=2.8.2=pyhd3eb1b0_0
-  - pytorch-mutex=1.0=cpu
-  - pytz=2022.7=py310haa95532_0
-  - pyyaml=6.0=py310h2bbff1b_1
-  - qt-main=5.15.2=he8e5bd7_8
-  - qt-webengine=5.15.9=hb9a9bb5_5
-  - qtwebkit=5.212=h2bbfb41_5
-  - re2=2022.04.01=hd77b12b_0
-  - requests=2.29.0=py310haa95532_0
-  - requests-oauthlib=1.3.0=py_0
-  - rsa=4.7.2=pyhd3eb1b0_1
-  - setuptools=67.8.0=py310haa95532_0
-  - sip=6.6.2=py310hd77b12b_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.41.2=h2bbff1b_0
-  - tbb=2021.8.0=h59b6b97_0
-  - tensorboard=2.10.0=py310haa95532_0
-  - tensorboard-data-server=0.6.1=py310haa95532_0
-  - tensorboard-plugin-wit=1.8.1=py310haa95532_0
-  - tk=8.6.12=h2bbff1b_0
-  - toml=0.10.2=pyhd3eb1b0_0
-  - tornado=6.2=py310h2bbff1b_0
-  - tqdm=4.65.0=py310h9909e9c_0
-  - typing_extensions=4.5.0=py310haa95532_0
-  - tzdata=2023c=h04d1e81_0
-  - urllib3=1.26.16=py310haa95532_0
-  - vc=14.2=h21ff451_1
-  - vs2015_runtime=14.27.29016=h5e58377_2
-  - werkzeug=2.2.3=py310haa95532_0
-  - wheel=0.38.4=py310haa95532_0
-  - win_inet_pton=1.1.0=py310haa95532_0
-  - xz=5.4.2=h8cc25b3_0
-  - yaml=0.2.5=he774522_0
-  - yarl=1.8.1=py310h2bbff1b_0
-  - zlib=1.2.13=h8cc25b3_0
-  - zstd=1.5.5=hd43e919_0
-  - pip:
-      - antlr4-python3-runtime==4.8
-      - appdirs==1.4.4
-      - audioread==3.0.0
-      - bitarray==2.7.4
-      - cython==0.29.35
-      - decorator==5.1.1
-      - fairseq==0.12.2
-      - faiss-cpu==1.7.4
-      - filelock==3.12.0
-      - hydra-core==1.0.7
-      - jinja2==3.1.2
-      - joblib==1.2.0
-      - lazy-loader==0.2
-      - librosa==0.10.0.post2
-      - llvmlite==0.40.0
-      - lxml==4.9.2
-      - mpmath==1.3.0
-      - msgpack==1.0.5
-      - networkx==3.1
-      - noisereduce==2.0.1
-      - numba==0.57.0
-      - omegaconf==2.0.6
-      - opencv-python==4.7.0.72
-      - pooch==1.6.0
-      - portalocker==2.7.0
-      - pysimplegui==4.60.5
-      - pywin32==306
-      - pyworld==0.3.3
-      - regex==2023.5.5
-      - sacrebleu==2.3.1
-      - scikit-learn==1.2.2
-      - scipy==1.10.1
-      - sounddevice==0.4.6
-      - soundfile==0.12.1
-      - soxr==0.3.5
-      - sympy==1.12
-      - tabulate==0.9.0
-      - threadpoolctl==3.1.0
-      - torch==2.0.0
-      - torch-directml==0.2.0.dev230426
-      - torchaudio==2.0.1
-      - torchvision==0.15.1
-      - wget==3.2
-prefix: D:\ProgramData\anaconda3_\envs\pydml
diff --git a/go-realtime-gui-dml.bat b/go-realtime-gui-dml.bat
deleted file mode 100644
index 2c87633..0000000
--- a/go-realtime-gui-dml.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-runtime\python.exe gui_v1.py --pycmd runtime\python.exe --dml
-pause
diff --git a/go-realtime-gui.bat b/go-realtime-gui.bat
deleted file mode 100644
index 8c08290..0000000
--- a/go-realtime-gui.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-runtime\python.exe gui_v1.py
-pause
diff --git a/go-web-dml.bat b/go-web-dml.bat
deleted file mode 100644
index fc5e708..0000000
--- a/go-web-dml.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897 --dml
-pause
diff --git a/go-web.bat b/go-web.bat
deleted file mode 100644
index db1dec5..0000000
--- a/go-web.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897
-pause
diff --git a/gui_v1.py b/gui_v1.py
deleted file mode 100644
index c5e7179..0000000
--- a/gui_v1.py
+++ /dev/null
@@ -1,1070 +0,0 @@
-import os
-import sys
-from dotenv import load_dotenv
-import shutil
-
-load_dotenv()
-
-os.environ["OMP_NUM_THREADS"] = "4"
-if sys.platform == "darwin":
-    os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-
-now_dir = os.getcwd()
-sys.path.append(now_dir)
-import multiprocessing
-
-flag_vc = False
-
-
-def printt(strr, *args):
-    if len(args) == 0:
-        print(strr)
-    else:
-        print(strr % args)
-
-
-def phase_vocoder(a, b, fade_out, fade_in):
-    window = torch.sqrt(fade_out * fade_in)
-    fa = torch.fft.rfft(a * window)
-    fb = torch.fft.rfft(b * window)
-    absab = torch.abs(fa) + torch.abs(fb)
-    n = a.shape[0]
-    if n % 2 == 0:
-        absab[1:-1] *= 2
-    else:
-        absab[1:] *= 2
-    phia = torch.angle(fa)
-    phib = torch.angle(fb)
-    deltaphase = phib - phia
-    deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
-    w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
-    t = torch.arange(n).unsqueeze(-1).to(a) / n
-    result = (
-        a * (fade_out**2)
-        + b * (fade_in**2)
-        + torch.sum(absab * torch.cos(w * t + phia), -1) * window / n
-    )
-    return result
-
-
-class Harvest(multiprocessing.Process):
-    def __init__(self, inp_q, opt_q):
-        multiprocessing.Process.__init__(self)
-        self.inp_q = inp_q
-        self.opt_q = opt_q
-
-    def run(self):
-        import numpy as np
-        import pyworld
-
-        while 1:
-            idx, x, res_f0, n_cpu, ts = self.inp_q.get()
-            f0, t = pyworld.harvest(
-                x.astype(np.double),
-                fs=16000,
-                f0_ceil=1100,
-                f0_floor=50,
-                frame_period=10,
-            )
-            res_f0[idx] = f0
-            if len(res_f0.keys()) >= n_cpu:
-                self.opt_q.put(ts)
-
-
-if __name__ == "__main__":
-    import json
-    import multiprocessing
-    import re
-    import threading
-    import time
-    import traceback
-    from multiprocessing import Queue, cpu_count
-    from queue import Empty
-
-    import librosa
-    from tools.torchgate import TorchGate
-    import numpy as np
-    import FreeSimpleGUI as sg
-    import sounddevice as sd
-    import torch
-    import torch.nn.functional as F
-    import torchaudio.transforms as tat
-
-    from infer.lib import rtrvc as rvc_for_realtime
-    from i18n.i18n import I18nAuto
-    from configs.config import Config
-
-    i18n = I18nAuto()
-
-    # device = rvc_for_realtime.config.device
-    # device = torch.device(
-    #     "cuda"
-    #     if torch.cuda.is_available()
-    #     else ("mps" if torch.backends.mps.is_available() else "cpu")
-    # )
-    current_dir = os.getcwd()
-    inp_q = Queue()
-    opt_q = Queue()
-    n_cpu = min(cpu_count(), 8)
-    for _ in range(n_cpu):
-        p = Harvest(inp_q, opt_q)
-        p.daemon = True
-        p.start()
-
-    class GUIConfig:
-        def __init__(self) -> None:
-            self.pth_path: str = ""
-            self.index_path: str = ""
-            self.pitch: int = 0
-            self.formant=0.0
-            self.sr_type: str = "sr_model"
-            self.block_time: float = 0.25  # s
-            self.threhold: int = -60
-            self.crossfade_time: float = 0.05
-            self.extra_time: float = 2.5
-            self.I_noise_reduce: bool = False
-            self.O_noise_reduce: bool = False
-            self.use_pv: bool = False
-            self.rms_mix_rate: float = 0.0
-            self.index_rate: float = 0.0
-            self.n_cpu: int = min(n_cpu, 4)
-            self.f0method: str = "fcpe"
-            self.sg_hostapi: str = ""
-            self.wasapi_exclusive: bool = False
-            self.sg_input_device: str = ""
-            self.sg_output_device: str = ""
-
-    class GUI:
-        def __init__(self) -> None:
-            self.gui_config = GUIConfig()
-            self.config = Config()
-            self.function = "vc"
-            self.delay_time = 0
-            self.hostapis = None
-            self.input_devices = None
-            self.output_devices = None
-            self.input_devices_indices = None
-            self.output_devices_indices = None
-            self.stream = None
-            self.update_devices()
-            self.launcher()
-
-        def load(self):
-            try:
-                if not os.path.exists("configs/inuse/config.json"):
-                    shutil.copy("configs/config.json", "configs/inuse/config.json")
-                with open("configs/inuse/config.json", "r") as j:
-                    data = json.load(j)
-                    data["sr_model"] = data["sr_type"] == "sr_model"
-                    data["sr_device"] = data["sr_type"] == "sr_device"
-                    data["pm"] = data["f0method"] == "pm"
-                    data["harvest"] = data["f0method"] == "harvest"
-                    data["crepe"] = data["f0method"] == "crepe"
-                    data["rmvpe"] = data["f0method"] == "rmvpe"
-                    data["fcpe"] = data["f0method"] == "fcpe"
-                    if data["sg_hostapi"] in self.hostapis:
-                        self.update_devices(hostapi_name=data["sg_hostapi"])
-                        if (
-                            data["sg_input_device"] not in self.input_devices
-                            or data["sg_output_device"] not in self.output_devices
-                        ):
-                            self.update_devices()
-                            data["sg_hostapi"] = self.hostapis[0]
-                            data["sg_input_device"] = self.input_devices[
-                                self.input_devices_indices.index(sd.default.device[0])
-                            ]
-                            data["sg_output_device"] = self.output_devices[
-                                self.output_devices_indices.index(sd.default.device[1])
-                            ]
-                    else:
-                        data["sg_hostapi"] = self.hostapis[0]
-                        data["sg_input_device"] = self.input_devices[
-                            self.input_devices_indices.index(sd.default.device[0])
-                        ]
-                        data["sg_output_device"] = self.output_devices[
-                            self.output_devices_indices.index(sd.default.device[1])
-                        ]
-            except:
-                with open("configs/inuse/config.json", "w") as j:
-                    data = {
-                        "pth_path": "",
-                        "index_path": "",
-                        "sg_hostapi": self.hostapis[0],
-                        "sg_wasapi_exclusive": False,
-                        "sg_input_device": self.input_devices[
-                            self.input_devices_indices.index(sd.default.device[0])
-                        ],
-                        "sg_output_device": self.output_devices[
-                            self.output_devices_indices.index(sd.default.device[1])
-                        ],
-                        "sr_type": "sr_model",
-                        "threhold": -60,
-                        "pitch": 0,
-                        "formant": 0.0,
-                        "index_rate": 0,
-                        "rms_mix_rate": 0,
-                        "block_time": 0.25,
-                        "crossfade_length": 0.05,
-                        "extra_time": 2.5,
-                        "n_cpu": 4,
-                        "f0method": "rmvpe",
-                        "use_jit": False,
-                        "use_pv": False,
-                    }
-                    data["sr_model"] = data["sr_type"] == "sr_model"
-                    data["sr_device"] = data["sr_type"] == "sr_device"
-                    data["pm"] = data["f0method"] == "pm"
-                    data["harvest"] = data["f0method"] == "harvest"
-                    data["crepe"] = data["f0method"] == "crepe"
-                    data["rmvpe"] = data["f0method"] == "rmvpe"
-                    data["fcpe"] = data["f0method"] == "fcpe"
-            return data
-
-        def launcher(self):
-            data = self.load()
-            self.config.use_jit = False  # data.get("use_jit", self.config.use_jit)
-            sg.theme("LightBlue3")
-            layout = [
-                [
-                    sg.Frame(
-                        title=i18n("加载模型"),
-                        layout=[
-                            [
-                                sg.Input(
-                                    default_text=data.get("pth_path", ""),
-                                    key="pth_path",
-                                ),
-                                sg.FileBrowse(
-                                    i18n("选择.pth文件"),
-                                    initial_folder=os.path.join(
-                                        os.getcwd(), "assets/weights"
-                                    ),
-                                    file_types=((". pth"),),
-                                ),
-                            ],
-                            [
-                                sg.Input(
-                                    default_text=data.get("index_path", ""),
-                                    key="index_path",
-                                ),
-                                sg.FileBrowse(
-                                    i18n("选择.index文件"),
-                                    initial_folder=os.path.join(os.getcwd(), "logs"),
-                                    file_types=((". index"),),
-                                ),
-                            ],
-                        ],
-                    )
-                ],
-                [
-                    sg.Frame(
-                        layout=[
-                            [
-                                sg.Text(i18n("设备类型")),
-                                sg.Combo(
-                                    self.hostapis,
-                                    key="sg_hostapi",
-                                    default_value=data.get("sg_hostapi", ""),
-                                    enable_events=True,
-                                    size=(20, 1),
-                                ),
-                                sg.Checkbox(
-                                    i18n("独占 WASAPI 设备"),
-                                    key="sg_wasapi_exclusive",
-                                    default=data.get("sg_wasapi_exclusive", False),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("输入设备")),
-                                sg.Combo(
-                                    self.input_devices,
-                                    key="sg_input_device",
-                                    default_value=data.get("sg_input_device", ""),
-                                    enable_events=True,
-                                    size=(45, 1),
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("输出设备")),
-                                sg.Combo(
-                                    self.output_devices,
-                                    key="sg_output_device",
-                                    default_value=data.get("sg_output_device", ""),
-                                    enable_events=True,
-                                    size=(45, 1),
-                                ),
-                            ],
-                            [
-                                sg.Button(i18n("重载设备列表"), key="reload_devices"),
-                                sg.Radio(
-                                    i18n("使用模型采样率"),
-                                    "sr_type",
-                                    key="sr_model",
-                                    default=data.get("sr_model", True),
-                                    enable_events=True,
-                                ),
-                                sg.Radio(
-                                    i18n("使用设备采样率"),
-                                    "sr_type",
-                                    key="sr_device",
-                                    default=data.get("sr_device", False),
-                                    enable_events=True,
-                                ),
-                                sg.Text(i18n("采样率:")),
-                                sg.Text("", key="sr_stream"),
-                            ],
-                        ],
-                        title=i18n("音频设备"),
-                    )
-                ],
-                [
-                    sg.Frame(
-                        layout=[
-                            [
-                                sg.Text(i18n("响应阈值")),
-                                sg.Slider(
-                                    range=(-60, 0),
-                                    key="threhold",
-                                    resolution=1,
-                                    orientation="h",
-                                    default_value=data.get("threhold", -60),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("音调设置")),
-                                sg.Slider(
-                                    range=(-16, 16),
-                                    key="pitch",
-                                    resolution=1,
-                                    orientation="h",
-                                    default_value=data.get("pitch", 0),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("性别因子/声线粗细")),
-                                sg.Slider(
-                                    range=(-2, 2),
-                                    key="formant",
-                                    resolution=0.05,
-                                    orientation="h",
-                                    default_value=data.get("formant", 0.0),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("Index Rate")),
-                                sg.Slider(
-                                    range=(0.0, 1.0),
-                                    key="index_rate",
-                                    resolution=0.01,
-                                    orientation="h",
-                                    default_value=data.get("index_rate", 0),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("响度因子")),
-                                sg.Slider(
-                                    range=(0.0, 1.0),
-                                    key="rms_mix_rate",
-                                    resolution=0.01,
-                                    orientation="h",
-                                    default_value=data.get("rms_mix_rate", 0),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("音高算法")),
-                                sg.Radio(
-                                    "pm",
-                                    "f0method",
-                                    key="pm",
-                                    default=data.get("pm", False),
-                                    enable_events=True,
-                                ),
-                                sg.Radio(
-                                    "harvest",
-                                    "f0method",
-                                    key="harvest",
-                                    default=data.get("harvest", False),
-                                    enable_events=True,
-                                ),
-                                sg.Radio(
-                                    "crepe",
-                                    "f0method",
-                                    key="crepe",
-                                    default=data.get("crepe", False),
-                                    enable_events=True,
-                                ),
-                                sg.Radio(
-                                    "rmvpe",
-                                    "f0method",
-                                    key="rmvpe",
-                                    default=data.get("rmvpe", False),
-                                    enable_events=True,
-                                ),
-                                sg.Radio(
-                                    "fcpe",
-                                    "f0method",
-                                    key="fcpe",
-                                    default=data.get("fcpe", True),
-                                    enable_events=True,
-                                ),
-                            ],
-                        ],
-                        title=i18n("常规设置"),
-                    ),
-                    sg.Frame(
-                        layout=[
-                            [
-                                sg.Text(i18n("采样长度")),
-                                sg.Slider(
-                                    range=(0.02, 1.5),
-                                    key="block_time",
-                                    resolution=0.01,
-                                    orientation="h",
-                                    default_value=data.get("block_time", 0.25),
-                                    enable_events=True,
-                                ),
-                            ],
-                            # [
-                            #     sg.Text("设备延迟"),
-                            #     sg.Slider(
-                            #         range=(0, 1),
-                            #         key="device_latency",
-                            #         resolution=0.001,
-                            #         orientation="h",
-                            #         default_value=data.get("device_latency", 0.1),
-                            #         enable_events=True,
-                            #     ),
-                            # ],
-                            [
-                                sg.Text(i18n("harvest进程数")),
-                                sg.Slider(
-                                    range=(1, n_cpu),
-                                    key="n_cpu",
-                                    resolution=1,
-                                    orientation="h",
-                                    default_value=data.get(
-                                        "n_cpu", min(self.gui_config.n_cpu, n_cpu)
-                                    ),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("淡入淡出长度")),
-                                sg.Slider(
-                                    range=(0.01, 0.15),
-                                    key="crossfade_length",
-                                    resolution=0.01,
-                                    orientation="h",
-                                    default_value=data.get("crossfade_length", 0.05),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Text(i18n("额外推理时长")),
-                                sg.Slider(
-                                    range=(0.05, 5.00),
-                                    key="extra_time",
-                                    resolution=0.01,
-                                    orientation="h",
-                                    default_value=data.get("extra_time", 2.5),
-                                    enable_events=True,
-                                ),
-                            ],
-                            [
-                                sg.Checkbox(
-                                    i18n("输入降噪"),
-                                    key="I_noise_reduce",
-                                    enable_events=True,
-                                ),
-                                sg.Checkbox(
-                                    i18n("输出降噪"),
-                                    key="O_noise_reduce",
-                                    enable_events=True,
-                                ),
-                                sg.Checkbox(
-                                    i18n("启用相位声码器"),
-                                    key="use_pv",
-                                    default=data.get("use_pv", False),
-                                    enable_events=True,
-                                ),
-                                # sg.Checkbox(
-                                #     "JIT加速",
-                                #     default=self.config.use_jit,
-                                #     key="use_jit",
-                                #     enable_events=False,
-                                # ),
-                            ],
-                            # [sg.Text("注：首次使用JIT加速时，会出现卡顿，\n      并伴随一些噪音，但这是正常现象！")],
-                        ],
-                        title=i18n("性能设置"),
-                    ),
-                ],
-                [
-                    sg.Button(i18n("开始音频转换"), key="start_vc"),
-                    sg.Button(i18n("停止音频转换"), key="stop_vc"),
-                    sg.Radio(
-                        i18n("输入监听"),
-                        "function",
-                        key="im",
-                        default=False,
-                        enable_events=True,
-                    ),
-                    sg.Radio(
-                        i18n("输出变声"),
-                        "function",
-                        key="vc",
-                        default=True,
-                        enable_events=True,
-                    ),
-                    sg.Text(i18n("算法延迟(ms):")),
-                    sg.Text("0", key="delay_time"),
-                    sg.Text(i18n("推理时间(ms):")),
-                    sg.Text("0", key="infer_time"),
-                ],
-            ]
-            self.window = sg.Window("RVC - GUI", layout=layout, finalize=True)
-            self.event_handler()
-
-        def event_handler(self):
-            global flag_vc
-            while True:
-                event, values = self.window.read()
-                if event == sg.WINDOW_CLOSED:
-                    self.stop_stream()
-                    exit()
-                if event == "reload_devices" or event == "sg_hostapi":
-                    self.gui_config.sg_hostapi = values["sg_hostapi"]
-                    self.update_devices(hostapi_name=values["sg_hostapi"])
-                    if self.gui_config.sg_hostapi not in self.hostapis:
-                        self.gui_config.sg_hostapi = self.hostapis[0]
-                    self.window["sg_hostapi"].Update(values=self.hostapis)
-                    self.window["sg_hostapi"].Update(value=self.gui_config.sg_hostapi)
-                    if (
-                        self.gui_config.sg_input_device not in self.input_devices
-                        and len(self.input_devices) > 0
-                    ):
-                        self.gui_config.sg_input_device = self.input_devices[0]
-                    self.window["sg_input_device"].Update(values=self.input_devices)
-                    self.window["sg_input_device"].Update(
-                        value=self.gui_config.sg_input_device
-                    )
-                    if self.gui_config.sg_output_device not in self.output_devices:
-                        self.gui_config.sg_output_device = self.output_devices[0]
-                    self.window["sg_output_device"].Update(values=self.output_devices)
-                    self.window["sg_output_device"].Update(
-                        value=self.gui_config.sg_output_device
-                    )
-                if event == "start_vc" and not flag_vc:
-                    if self.set_values(values) == True:
-                        printt("cuda_is_available: %s", torch.cuda.is_available())
-                        self.start_vc()
-                        settings = {
-                            "pth_path": values["pth_path"],
-                            "index_path": values["index_path"],
-                            "sg_hostapi": values["sg_hostapi"],
-                            "sg_wasapi_exclusive": values["sg_wasapi_exclusive"],
-                            "sg_input_device": values["sg_input_device"],
-                            "sg_output_device": values["sg_output_device"],
-                            "sr_type": ["sr_model", "sr_device"][
-                                [
-                                    values["sr_model"],
-                                    values["sr_device"],
-                                ].index(True)
-                            ],
-                            "threhold": values["threhold"],
-                            "pitch": values["pitch"],
-                            "rms_mix_rate": values["rms_mix_rate"],
-                            "index_rate": values["index_rate"],
-                            # "device_latency": values["device_latency"],
-                            "block_time": values["block_time"],
-                            "crossfade_length": values["crossfade_length"],
-                            "extra_time": values["extra_time"],
-                            "n_cpu": values["n_cpu"],
-                            # "use_jit": values["use_jit"],
-                            "use_jit": False,
-                            "use_pv": values["use_pv"],
-                            "f0method": ["pm", "harvest", "crepe", "rmvpe", "fcpe"][
-                                [
-                                    values["pm"],
-                                    values["harvest"],
-                                    values["crepe"],
-                                    values["rmvpe"],
-                                    values["fcpe"],
-                                ].index(True)
-                            ],
-                        }
-                        with open("configs/inuse/config.json", "w") as j:
-                            json.dump(settings, j)
-                        if self.stream is not None:
-                            self.delay_time = (
-                                self.stream.latency[-1]
-                                + values["block_time"]
-                                + values["crossfade_length"]
-                                + 0.01
-                            )
-                        if values["I_noise_reduce"]:
-                            self.delay_time += min(values["crossfade_length"], 0.04)
-                        self.window["sr_stream"].update(self.gui_config.samplerate)
-                        self.window["delay_time"].update(
-                            int(np.round(self.delay_time * 1000))
-                        )
-                # Parameter hot update
-                if event == "threhold":
-                    self.gui_config.threhold = values["threhold"]
-                elif event == "pitch":
-                    self.gui_config.pitch = values["pitch"]
-                    if hasattr(self, "rvc"):
-                        self.rvc.change_key(values["pitch"])
-                elif event == "formant":
-                    self.gui_config.formant = values["formant"]
-                    if hasattr(self, "rvc"):
-                        self.rvc.change_formant(values["formant"])
-                elif event == "index_rate":
-                    self.gui_config.index_rate = values["index_rate"]
-                    if hasattr(self, "rvc"):
-                        self.rvc.change_index_rate(values["index_rate"])
-                elif event == "rms_mix_rate":
-                    self.gui_config.rms_mix_rate = values["rms_mix_rate"]
-                elif event in ["pm", "harvest", "crepe", "rmvpe", "fcpe"]:
-                    self.gui_config.f0method = event
-                elif event == "I_noise_reduce":
-                    self.gui_config.I_noise_reduce = values["I_noise_reduce"]
-                    if self.stream is not None:
-                        self.delay_time += (
-                            1 if values["I_noise_reduce"] else -1
-                        ) * min(values["crossfade_length"], 0.04)
-                        self.window["delay_time"].update(
-                            int(np.round(self.delay_time * 1000))
-                        )
-                elif event == "O_noise_reduce":
-                    self.gui_config.O_noise_reduce = values["O_noise_reduce"]
-                elif event == "use_pv":
-                    self.gui_config.use_pv = values["use_pv"]
-                elif event in ["vc", "im"]:
-                    self.function = event
-                elif event == "stop_vc" or event != "start_vc":
-                    # Other parameters do not support hot update
-                    self.stop_stream()
-
-        def set_values(self, values):
-            if len(values["pth_path"].strip()) == 0:
-                sg.popup(i18n("请选择pth文件"))
-                return False
-            if len(values["index_path"].strip()) == 0:
-                sg.popup(i18n("请选择index文件"))
-                return False
-            pattern = re.compile("[^\x00-\x7F]+")
-            if pattern.findall(values["pth_path"]):
-                sg.popup(i18n("pth文件路径不可包含中文"))
-                return False
-            if pattern.findall(values["index_path"]):
-                sg.popup(i18n("index文件路径不可包含中文"))
-                return False
-            self.set_devices(values["sg_input_device"], values["sg_output_device"])
-            self.config.use_jit = False  # values["use_jit"]
-            # self.device_latency = values["device_latency"]
-            self.gui_config.sg_hostapi = values["sg_hostapi"]
-            self.gui_config.sg_wasapi_exclusive = values["sg_wasapi_exclusive"]
-            self.gui_config.sg_input_device = values["sg_input_device"]
-            self.gui_config.sg_output_device = values["sg_output_device"]
-            self.gui_config.pth_path = values["pth_path"]
-            self.gui_config.index_path = values["index_path"]
-            self.gui_config.sr_type = ["sr_model", "sr_device"][
-                [
-                    values["sr_model"],
-                    values["sr_device"],
-                ].index(True)
-            ]
-            self.gui_config.threhold = values["threhold"]
-            self.gui_config.pitch = values["pitch"]
-            self.gui_config.formant = values["formant"]
-            self.gui_config.block_time = values["block_time"]
-            self.gui_config.crossfade_time = values["crossfade_length"]
-            self.gui_config.extra_time = values["extra_time"]
-            self.gui_config.I_noise_reduce = values["I_noise_reduce"]
-            self.gui_config.O_noise_reduce = values["O_noise_reduce"]
-            self.gui_config.use_pv = values["use_pv"]
-            self.gui_config.rms_mix_rate = values["rms_mix_rate"]
-            self.gui_config.index_rate = values["index_rate"]
-            self.gui_config.n_cpu = values["n_cpu"]
-            self.gui_config.f0method = ["pm", "harvest", "crepe", "rmvpe", "fcpe"][
-                [
-                    values["pm"],
-                    values["harvest"],
-                    values["crepe"],
-                    values["rmvpe"],
-                    values["fcpe"],
-                ].index(True)
-            ]
-            return True
-
-        def start_vc(self):
-            torch.cuda.empty_cache()
-            self.rvc = rvc_for_realtime.RVC(
-                self.gui_config.pitch,
-                self.gui_config.formant,
-                self.gui_config.pth_path,
-                self.gui_config.index_path,
-                self.gui_config.index_rate,
-                self.gui_config.n_cpu,
-                inp_q,
-                opt_q,
-                self.config,
-                self.rvc if hasattr(self, "rvc") else None,
-            )
-            self.gui_config.samplerate = (
-                self.rvc.tgt_sr
-                if self.gui_config.sr_type == "sr_model"
-                else self.get_device_samplerate()
-            )
-            self.gui_config.channels = self.get_device_channels()
-            self.zc = self.gui_config.samplerate // 100
-            self.block_frame = (
-                int(
-                    np.round(
-                        self.gui_config.block_time
-                        * self.gui_config.samplerate
-                        / self.zc
-                    )
-                )
-                * self.zc
-            )
-            self.block_frame_16k = 160 * self.block_frame // self.zc
-            self.crossfade_frame = (
-                int(
-                    np.round(
-                        self.gui_config.crossfade_time
-                        * self.gui_config.samplerate
-                        / self.zc
-                    )
-                )
-                * self.zc
-            )
-            self.sola_buffer_frame = min(self.crossfade_frame, 4 * self.zc)
-            self.sola_search_frame = self.zc
-            self.extra_frame = (
-                int(
-                    np.round(
-                        self.gui_config.extra_time
-                        * self.gui_config.samplerate
-                        / self.zc
-                    )
-                )
-                * self.zc
-            )
-            self.input_wav: torch.Tensor = torch.zeros(
-                self.extra_frame
-                + self.crossfade_frame
-                + self.sola_search_frame
-                + self.block_frame,
-                device=self.config.device,
-                dtype=torch.float32,
-            )
-            self.input_wav_denoise: torch.Tensor = self.input_wav.clone()
-            self.input_wav_res: torch.Tensor = torch.zeros(
-                160 * self.input_wav.shape[0] // self.zc,
-                device=self.config.device,
-                dtype=torch.float32,
-            )
-            self.rms_buffer: np.ndarray = np.zeros(4 * self.zc, dtype="float32")
-            self.sola_buffer: torch.Tensor = torch.zeros(
-                self.sola_buffer_frame, device=self.config.device, dtype=torch.float32
-            )
-            self.nr_buffer: torch.Tensor = self.sola_buffer.clone()
-            self.output_buffer: torch.Tensor = self.input_wav.clone()
-            self.skip_head = self.extra_frame // self.zc
-            self.return_length = (
-                self.block_frame + self.sola_buffer_frame + self.sola_search_frame
-            ) // self.zc
-            self.fade_in_window: torch.Tensor = (
-                torch.sin(
-                    0.5
-                    * np.pi
-                    * torch.linspace(
-                        0.0,
-                        1.0,
-                        steps=self.sola_buffer_frame,
-                        device=self.config.device,
-                        dtype=torch.float32,
-                    )
-                )
-                ** 2
-            )
-            self.fade_out_window: torch.Tensor = 1 - self.fade_in_window
-            self.resampler = tat.Resample(
-                orig_freq=self.gui_config.samplerate,
-                new_freq=16000,
-                dtype=torch.float32,
-            ).to(self.config.device)
-            if self.rvc.tgt_sr != self.gui_config.samplerate:
-                self.resampler2 = tat.Resample(
-                    orig_freq=self.rvc.tgt_sr,
-                    new_freq=self.gui_config.samplerate,
-                    dtype=torch.float32,
-                ).to(self.config.device)
-            else:
-                self.resampler2 = None
-            self.tg = TorchGate(
-                sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9
-            ).to(self.config.device)
-            self.start_stream()
-
-        def start_stream(self):
-            global flag_vc
-            if not flag_vc:
-                flag_vc = True
-                if (
-                    "WASAPI" in self.gui_config.sg_hostapi
-                    and self.gui_config.sg_wasapi_exclusive
-                ):
-                    extra_settings = sd.WasapiSettings(exclusive=True)
-                else:
-                    extra_settings = None
-                self.stream = sd.Stream(
-                    callback=self.audio_callback,
-                    blocksize=self.block_frame,
-                    samplerate=self.gui_config.samplerate,
-                    channels=self.gui_config.channels,
-                    dtype="float32",
-                    extra_settings=extra_settings,
-                )
-                self.stream.start()
-
-        def stop_stream(self):
-            global flag_vc
-            if flag_vc:
-                flag_vc = False
-                if self.stream is not None:
-                    self.stream.abort()
-                    self.stream.close()
-                    self.stream = None
-
-        def audio_callback(
-            self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
-        ):
-            """
-            音频处理
-            """
-            global flag_vc
-            start_time = time.perf_counter()
-            indata = librosa.to_mono(indata.T)
-            if self.gui_config.threhold > -60:
-                indata = np.append(self.rms_buffer, indata)
-                rms = librosa.feature.rms(
-                    y=indata, frame_length=4 * self.zc, hop_length=self.zc
-                )[:, 2:]
-                self.rms_buffer[:] = indata[-4 * self.zc :]
-                indata = indata[2 * self.zc - self.zc // 2 :]
-                db_threhold = (
-                    librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold
-                )
-                for i in range(db_threhold.shape[0]):
-                    if db_threhold[i]:
-                        indata[i * self.zc : (i + 1) * self.zc] = 0
-                indata = indata[self.zc // 2 :]
-            self.input_wav[: -self.block_frame] = self.input_wav[
-                self.block_frame :
-            ].clone()
-            self.input_wav[-indata.shape[0] :] = torch.from_numpy(indata).to(
-                self.config.device
-            )
-            self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[
-                self.block_frame_16k :
-            ].clone()
-            # input noise reduction and resampling
-            if self.gui_config.I_noise_reduce:
-                self.input_wav_denoise[: -self.block_frame] = self.input_wav_denoise[
-                    self.block_frame :
-                ].clone()
-                input_wav = self.input_wav[-self.sola_buffer_frame - self.block_frame :]
-                input_wav = self.tg(
-                    input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)
-                ).squeeze(0)
-                input_wav[: self.sola_buffer_frame] *= self.fade_in_window
-                input_wav[: self.sola_buffer_frame] += (
-                    self.nr_buffer * self.fade_out_window
-                )
-                self.input_wav_denoise[-self.block_frame :] = input_wav[
-                    : self.block_frame
-                ]
-                self.nr_buffer[:] = input_wav[self.block_frame :]
-                self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
-                    self.input_wav_denoise[-self.block_frame - 2 * self.zc :]
-                )[160:]
-            else:
-                self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = (
-                    self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[
-                        160:
-                    ]
-                )
-            # infer
-            if self.function == "vc":
-                infer_wav = self.rvc.infer(
-                    self.input_wav_res,
-                    self.block_frame_16k,
-                    self.skip_head,
-                    self.return_length,
-                    self.gui_config.f0method,
-                )
-                if self.resampler2 is not None:
-                    infer_wav = self.resampler2(infer_wav)
-            elif self.gui_config.I_noise_reduce:
-                infer_wav = self.input_wav_denoise[self.extra_frame :].clone()
-            else:
-                infer_wav = self.input_wav[self.extra_frame :].clone()
-            # output noise reduction
-            if self.gui_config.O_noise_reduce and self.function == "vc":
-                self.output_buffer[: -self.block_frame] = self.output_buffer[
-                    self.block_frame :
-                ].clone()
-                self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :]
-                infer_wav = self.tg(
-                    infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)
-                ).squeeze(0)
-            # volume envelop mixing
-            if self.gui_config.rms_mix_rate < 1 and self.function == "vc":
-                if self.gui_config.I_noise_reduce:
-                    input_wav = self.input_wav_denoise[self.extra_frame :]
-                else:
-                    input_wav = self.input_wav[self.extra_frame :]
-                rms1 = librosa.feature.rms(
-                    y=input_wav[: infer_wav.shape[0]].cpu().numpy(),
-                    frame_length=4 * self.zc,
-                    hop_length=self.zc,
-                )
-                rms1 = torch.from_numpy(rms1).to(self.config.device)
-                rms1 = F.interpolate(
-                    rms1.unsqueeze(0),
-                    size=infer_wav.shape[0] + 1,
-                    mode="linear",
-                    align_corners=True,
-                )[0, 0, :-1]
-                rms2 = librosa.feature.rms(
-                    y=infer_wav[:].cpu().numpy(),
-                    frame_length=4 * self.zc,
-                    hop_length=self.zc,
-                )
-                rms2 = torch.from_numpy(rms2).to(self.config.device)
-                rms2 = F.interpolate(
-                    rms2.unsqueeze(0),
-                    size=infer_wav.shape[0] + 1,
-                    mode="linear",
-                    align_corners=True,
-                )[0, 0, :-1]
-                rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
-                infer_wav *= torch.pow(
-                    rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate)
-                )
-            # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
-            conv_input = infer_wav[
-                None, None, : self.sola_buffer_frame + self.sola_search_frame
-            ]
-            cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
-            cor_den = torch.sqrt(
-                F.conv1d(
-                    conv_input**2,
-                    torch.ones(1, 1, self.sola_buffer_frame, device=self.config.device),
-                )
-                + 1e-8
-            )
-            if sys.platform == "darwin":
-                _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
-                sola_offset = sola_offset.item()
-            else:
-                sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
-            printt("sola_offset = %d", int(sola_offset))
-            infer_wav = infer_wav[sola_offset:]
-            if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv:
-                infer_wav[: self.sola_buffer_frame] *= self.fade_in_window
-                infer_wav[: self.sola_buffer_frame] += (
-                    self.sola_buffer * self.fade_out_window
-                )
-            else:
-                infer_wav[: self.sola_buffer_frame] = phase_vocoder(
-                    self.sola_buffer,
-                    infer_wav[: self.sola_buffer_frame],
-                    self.fade_out_window,
-                    self.fade_in_window,
-                )
-            self.sola_buffer[:] = infer_wav[
-                self.block_frame : self.block_frame + self.sola_buffer_frame
-            ]
-            outdata[:] = (
-                infer_wav[: self.block_frame]
-                .repeat(self.gui_config.channels, 1)
-                .t()
-                .cpu()
-                .numpy()
-            )
-            total_time = time.perf_counter() - start_time
-            if flag_vc:
-                self.window["infer_time"].update(int(total_time * 1000))
-            printt("Infer time: %.2f", total_time)
-
-        def update_devices(self, hostapi_name=None):
-            """获取设备列表"""
-            global flag_vc
-            flag_vc = False
-            sd._terminate()
-            sd._initialize()
-            devices = sd.query_devices()
-            hostapis = sd.query_hostapis()
-            for hostapi in hostapis:
-                for device_idx in hostapi["devices"]:
-                    devices[device_idx]["hostapi_name"] = hostapi["name"]
-            self.hostapis = [hostapi["name"] for hostapi in hostapis]
-            if hostapi_name not in self.hostapis:
-                hostapi_name = self.hostapis[0]
-            self.input_devices = [
-                d["name"]
-                for d in devices
-                if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name
-            ]
-            self.output_devices = [
-                d["name"]
-                for d in devices
-                if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name
-            ]
-            self.input_devices_indices = [
-                d["index"] if "index" in d else d["name"]
-                for d in devices
-                if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name
-            ]
-            self.output_devices_indices = [
-                d["index"] if "index" in d else d["name"]
-                for d in devices
-                if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name
-            ]
-
-        def set_devices(self, input_device, output_device):
-            """设置输出设备"""
-            sd.default.device[0] = self.input_devices_indices[
-                self.input_devices.index(input_device)
-            ]
-            sd.default.device[1] = self.output_devices_indices[
-                self.output_devices.index(output_device)
-            ]
-            printt("Input device: %s:%s", str(sd.default.device[0]), input_device)
-            printt("Output device: %s:%s", str(sd.default.device[1]), output_device)
-
-        def get_device_samplerate(self):
-            return int(
-                sd.query_devices(device=sd.default.device[0])["default_samplerate"]
-            )
-
-        def get_device_channels(self):
-            max_input_channels = sd.query_devices(device=sd.default.device[0])[
-                "max_input_channels"
-            ]
-            max_output_channels = sd.query_devices(device=sd.default.device[1])[
-                "max_output_channels"
-            ]
-            return min(max_input_channels, max_output_channels, 2)
-
-    gui = GUI()
diff --git a/poetry.lock b/poetry.lock
index 612c5d4..5921399 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -174,13 +174,13 @@ doc = ["docutils", "jinja2", "myst-parser", "numpydoc", "pillow (>=9,<10)", "pyd
 
 [[package]]
 name = "antlr4-python3-runtime"
-version = "4.8"
-description = "ANTLR 4.8 runtime for Python 3.7"
+version = "4.9.3"
+description = "ANTLR 4.9.3 runtime for Python 3.7"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "antlr4-python3-runtime-4.8.tar.gz", hash = "sha256:15793f5d0512a372b4e7d2284058ad32ce7dd27126b105fb0b2245130445db33"},
+    {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"},
 ]
 
 [[package]]
@@ -204,23 +204,6 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin
 test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
 trio = ["trio (>=0.23)"]
 
-[[package]]
-name = "aria2"
-version = "0.0.1b0"
-description = "python wheel for aria2 static build."
-optional = false
-python-versions = ">=3.8"
-groups = ["main"]
-files = [
-    {file = "aria2-0.0.1b0-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:349f9fe693d900453d2c1a885cab761a0fce6b92ac7e390df5da32edbb5018e5"},
-    {file = "aria2-0.0.1b0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:80e6626d297e8d391f483e173abc193a7232a74b26c108c89250e697ced02596"},
-    {file = "aria2-0.0.1b0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:8bbbeaa1bc1a0b30ccdd7909e786821fb345071ba0fe14157a0f715a761e1691"},
-    {file = "aria2-0.0.1b0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0f87a26658c44668d4aadaf50c0342c63b04814032f51bf533bdeacc024ed1e2"},
-    {file = "aria2-0.0.1b0-py3-none-win32.whl", hash = "sha256:4e73cfb52fd61aedfb7e8e23942b50eebb3081c452ece8acb682622dfcdb709d"},
-    {file = "aria2-0.0.1b0-py3-none-win_amd64.whl", hash = "sha256:00aa00c9f8b709d37849a84760709d31b6c7752007f645f8bea175325c0b14cb"},
-    {file = "aria2-0.0.1b0.tar.gz", hash = "sha256:332b31b5caafb381c43112de01f01cade19de5c68b515c37d971bb147edf56f8"},
-]
-
 [[package]]
 name = "attrs"
 version = "24.2.0"
@@ -866,34 +849,39 @@ files = [
 
 [[package]]
 name = "fairseq"
-version = "0.12.2"
+version = "0.12.3"
 description = "Facebook AI Research Sequence-to-Sequence Toolkit"
 optional = false
 python-versions = "*"
 groups = ["main"]
-files = [
-    {file = "fairseq-0.12.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fe65b07c5121b7cda0c7a17166994a6b0059259ce37881b6daa117b8c209b662"},
-    {file = "fairseq-0.12.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0543905012e39f00bd8c3f3781d9f49e76ab309801eb2eb7de250f5984df0de3"},
-    {file = "fairseq-0.12.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4877d65346797fc580a3a7e6e2364d2331a0026ef099c22eb8311441e49c2c6"},
-    {file = "fairseq-0.12.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:26454f334ca705c67f898846dff34e14c148fcdaf53b4f52d64209773b509347"},
-    {file = "fairseq-0.12.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b8c8b6dc368d2fd23a06ff613a2af05959eee275fe90846d7cffef4a43c522a"},
-    {file = "fairseq-0.12.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:08fa308c760f995cdc13d9c385e2b9d923a78b48275d8b4d78f3a854c71a8f29"},
-    {file = "fairseq-0.12.2.tar.gz", hash = "sha256:34f1b18426bf3844714534162f065ab733e049597476daa35fffb4d06a92b524"},
-]
+files = []
+develop = false
 
 [package.dependencies]
 bitarray = "*"
 cffi = "*"
 cython = "*"
-hydra-core = ">=1.0.7,<1.1"
-numpy = {version = "*", markers = "python_version >= \"3.7\""}
-omegaconf = "<2.1"
+hydra-core = ">=1.3.2"
+numpy = ">=1.21.3"
+omegaconf = "*"
+packaging = "*"
 regex = "*"
 sacrebleu = ">=1.4.12"
-torch = "*"
+scikit-learn = "*"
+torch = ">=1.13"
 torchaudio = ">=0.8.0"
 tqdm = "*"
 
+[package.extras]
+dev = ["black (==22.3.0)", "flake8", "pytest"]
+docs = ["sphinx", "sphinx-argparse"]
+
+[package.source]
+type = "git"
+url = "https://github.com/One-sixth/fairseq.git"
+reference = "HEAD"
+resolved_reference = "44800430a728c2216fd1cf1e8daa672f50dfacba"
+
 [[package]]
 name = "faiss-cpu"
 version = "1.7.3"
@@ -1445,19 +1433,20 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve
 
 [[package]]
 name = "hydra-core"
-version = "1.0.7"
+version = "1.3.2"
 description = "A framework for elegantly configuring complex applications"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "hydra-core-1.0.7.tar.gz", hash = "sha256:58cc3f7531995b6d8de162ca21f936e17bdaebd4d1e8614d63c32e17c2e41e45"},
-    {file = "hydra_core-1.0.7-py3-none-any.whl", hash = "sha256:e800c6deb8309395508094851fa93bc13408f2285261eb97e626d37193b58a9f"},
+    {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"},
+    {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"},
 ]
 
 [package.dependencies]
-antlr4-python3-runtime = "4.8"
-omegaconf = ">=2.0.5,<2.1"
+antlr4-python3-runtime = "==4.9.*"
+omegaconf = ">=2.2,<2.4"
+packaging = "*"
 
 [[package]]
 name = "idna"
@@ -1665,34 +1654,56 @@ files = [
 ]
 
 [[package]]
-name = "librosa"
-version = "0.9.1"
-description = "Python module for audio and music processing"
+name = "lazy-loader"
+version = "0.4"
+description = "Makes it easy to load subpackages and functions on demand."
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "librosa-0.9.1-py3-none-any.whl", hash = "sha256:c2bb61a8008367cca89a3f1dad352d8e55fe5ca5f7414fb5d5258eb52765db33"},
-    {file = "librosa-0.9.1.tar.gz", hash = "sha256:7ed5d6e3f4546e5e3c2840691f9ddc56878f914a35a50060df5fca2b26d4b614"},
+    {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
+    {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
 ]
 
 [package.dependencies]
-audioread = ">=2.1.5"
-decorator = ">=4.0.10"
+packaging = "*"
+
+[package.extras]
+dev = ["changelist (==0.5)"]
+lint = ["pre-commit (==3.7.0)"]
+test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
+
+[[package]]
+name = "librosa"
+version = "0.10.2"
+description = "Python module for audio and music processing"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "librosa-0.10.2-py3-none-any.whl", hash = "sha256:469470592c51533e22d3caa1356c735de9bb0a2e9d29345ab4a1ed52e85f7dc8"},
+    {file = "librosa-0.10.2.tar.gz", hash = "sha256:444693b66b675f7e2f209cfc4f1226f54a3461f8568816e5a1add068101c3888"},
+]
+
+[package.dependencies]
+audioread = ">=2.1.9"
+decorator = ">=4.3.0"
 joblib = ">=0.14"
-numba = ">=0.45.1"
-numpy = ">=1.17.0"
-packaging = ">=20.0"
-pooch = ">=1.0"
-resampy = ">=0.2.2"
-scikit-learn = ">=0.19.1"
+lazy-loader = ">=0.1"
+msgpack = ">=1.0"
+numba = ">=0.51.0"
+numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2"
+pooch = ">=1.1"
+scikit-learn = ">=0.20.0"
 scipy = ">=1.2.0"
-soundfile = ">=0.10.2"
+soundfile = ">=0.12.1"
+soxr = ">=0.3.2"
+typing-extensions = ">=4.1.1"
 
 [package.extras]
 display = ["matplotlib (>=3.3.0)"]
-docs = ["ipython (>=7.0)", "matplotlib (>=3.3.0)", "mir-eval (>=0.5)", "numba (<0.50)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (==0.5.*)", "sphinxcontrib-svg2pdfconverter"]
-tests = ["contextlib2", "matplotlib (>=3.3.0)", "pytest", "pytest-cov", "pytest-mpl", "samplerate", "soxr"]
+docs = ["ipython (>=7.0)", "matplotlib (>=3.3.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"]
+tests = ["matplotlib (>=3.3.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"]
 
 [[package]]
 name = "linkify-it-py"
@@ -2147,6 +2158,80 @@ docs = ["sphinx"]
 gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
 tests = ["pytest (>=4.6)"]
 
+[[package]]
+name = "msgpack"
+version = "1.1.0"
+description = "MessagePack serializer"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ad442d527a7e358a469faf43fda45aaf4ac3249c8310a82f0ccff9164e5dccd"},
+    {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74bed8f63f8f14d75eec75cf3d04ad581da6b914001b474a5d3cd3372c8cc27d"},
+    {file = "msgpack-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:914571a2a5b4e7606997e169f64ce53a8b1e06f2cf2c3a7273aa106236d43dd5"},
+    {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921af52214dcbb75e6bdf6a661b23c3e6417f00c603dd2070bccb5c3ef499f5"},
+    {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8ce0b22b890be5d252de90d0e0d119f363012027cf256185fc3d474c44b1b9e"},
+    {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73322a6cc57fcee3c0c57c4463d828e9428275fb85a27aa2aa1a92fdc42afd7b"},
+    {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1f3c3d21f7cf67bcf2da8e494d30a75e4cf60041d98b3f79875afb5b96f3a3f"},
+    {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64fc9068d701233effd61b19efb1485587560b66fe57b3e50d29c5d78e7fef68"},
+    {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:42f754515e0f683f9c79210a5d1cad631ec3d06cea5172214d2176a42e67e19b"},
+    {file = "msgpack-1.1.0-cp310-cp310-win32.whl", hash = "sha256:3df7e6b05571b3814361e8464f9304c42d2196808e0119f55d0d3e62cd5ea044"},
+    {file = "msgpack-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:685ec345eefc757a7c8af44a3032734a739f8c45d1b0ac45efc5d8977aa4720f"},
+    {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d364a55082fb2a7416f6c63ae383fbd903adb5a6cf78c5b96cc6316dc1cedc7"},
+    {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79ec007767b9b56860e0372085f8504db5d06bd6a327a335449508bbee9648fa"},
+    {file = "msgpack-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6ad622bf7756d5a497d5b6836e7fc3752e2dd6f4c648e24b1803f6048596f701"},
+    {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e59bca908d9ca0de3dc8684f21ebf9a690fe47b6be93236eb40b99af28b6ea6"},
+    {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1da8f11a3dd397f0a32c76165cf0c4eb95b31013a94f6ecc0b280c05c91b59"},
+    {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452aff037287acb1d70a804ffd022b21fa2bb7c46bee884dbc864cc9024128a0"},
+    {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8da4bf6d54ceed70e8861f833f83ce0814a2b72102e890cbdfe4b34764cdd66e"},
+    {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:41c991beebf175faf352fb940bf2af9ad1fb77fd25f38d9142053914947cdbf6"},
+    {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a52a1f3a5af7ba1c9ace055b659189f6c669cf3657095b50f9602af3a3ba0fe5"},
+    {file = "msgpack-1.1.0-cp311-cp311-win32.whl", hash = "sha256:58638690ebd0a06427c5fe1a227bb6b8b9fdc2bd07701bec13c2335c82131a88"},
+    {file = "msgpack-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd2906780f25c8ed5d7b323379f6138524ba793428db5d0e9d226d3fa6aa1788"},
+    {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d"},
+    {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2"},
+    {file = "msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420"},
+    {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2"},
+    {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39"},
+    {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f"},
+    {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247"},
+    {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c"},
+    {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b"},
+    {file = "msgpack-1.1.0-cp312-cp312-win32.whl", hash = "sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b"},
+    {file = "msgpack-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f"},
+    {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf"},
+    {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330"},
+    {file = "msgpack-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734"},
+    {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e"},
+    {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca"},
+    {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915"},
+    {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d"},
+    {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434"},
+    {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c"},
+    {file = "msgpack-1.1.0-cp313-cp313-win32.whl", hash = "sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc"},
+    {file = "msgpack-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f"},
+    {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c40ffa9a15d74e05ba1fe2681ea33b9caffd886675412612d93ab17b58ea2fec"},
+    {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1ba6136e650898082d9d5a5217d5906d1e138024f836ff48691784bbe1adf96"},
+    {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0856a2b7e8dcb874be44fea031d22e5b3a19121be92a1e098f46068a11b0870"},
+    {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:471e27a5787a2e3f974ba023f9e265a8c7cfd373632247deb225617e3100a3c7"},
+    {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:646afc8102935a388ffc3914b336d22d1c2d6209c773f3eb5dd4d6d3b6f8c1cb"},
+    {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:13599f8829cfbe0158f6456374e9eea9f44eee08076291771d8ae93eda56607f"},
+    {file = "msgpack-1.1.0-cp38-cp38-win32.whl", hash = "sha256:8a84efb768fb968381e525eeeb3d92857e4985aacc39f3c47ffd00eb4509315b"},
+    {file = "msgpack-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:879a7b7b0ad82481c52d3c7eb99bf6f0645dbdec5134a4bddbd16f3506947feb"},
+    {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:53258eeb7a80fc46f62fd59c876957a2d0e15e6449a9e71842b6d24419d88ca1"},
+    {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e7b853bbc44fb03fbdba34feb4bd414322180135e2cb5164f20ce1c9795ee48"},
+    {file = "msgpack-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3e9b4936df53b970513eac1758f3882c88658a220b58dcc1e39606dccaaf01c"},
+    {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46c34e99110762a76e3911fc923222472c9d681f1094096ac4102c18319e6468"},
+    {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a706d1e74dd3dea05cb54580d9bd8b2880e9264856ce5068027eed09680aa74"},
+    {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:534480ee5690ab3cbed89d4c8971a5c631b69a8c0883ecfea96c19118510c846"},
+    {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8cf9e8c3a2153934a23ac160cc4cba0ec035f6867c8013cc6077a79823370346"},
+    {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3180065ec2abbe13a4ad37688b61b99d7f9e012a535b930e0e683ad6bc30155b"},
+    {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c5a91481a3cc573ac8c0d9aace09345d989dc4a0202b7fcb312c88c26d4e71a8"},
+    {file = "msgpack-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f80bc7d47f76089633763f952e67f8214cb7b3ee6bfa489b3cb6a84cfac114cd"},
+    {file = "msgpack-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:4d1b7ff2d6146e16e8bd665ac726a89c74163ef8cd39fa8c1087d4e52d3a2325"},
+    {file = "msgpack-1.1.0.tar.gz", hash = "sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e"},
+]
+
 [[package]]
 name = "multidict"
 version = "6.0.5"
@@ -2363,7 +2448,7 @@ description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
     {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
@@ -2376,7 +2461,7 @@ description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
     {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
@@ -2389,7 +2474,7 @@ description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
     {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
@@ -2402,27 +2487,12 @@ description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
     {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
 ]
 
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "8.9.2.26"
-description = "cuDNN runtime libraries"
-optional = false
-python-versions = ">=3"
-groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\""
-files = [
-    {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
-]
-
-[package.dependencies]
-nvidia-cublas-cu12 = "*"
-
 [[package]]
 name = "nvidia-cudnn-cu12"
 version = "9.1.0.70"
@@ -2430,7 +2500,7 @@ description = "cuDNN runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
@@ -2446,7 +2516,7 @@ description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
     {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
@@ -2459,7 +2529,7 @@ description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
     {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
@@ -2472,7 +2542,7 @@ description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
     {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
@@ -2490,7 +2560,7 @@ description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
     {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
@@ -2499,18 +2569,6 @@ files = [
 [package.dependencies]
 nvidia-nvjitlink-cu12 = "*"
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.18.1"
-description = "NVIDIA Collective Communication Library (NCCL) Runtime"
-optional = false
-python-versions = ">=3"
-groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\""
-files = [
-    {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"},
-]
-
 [[package]]
 name = "nvidia-nccl-cu12"
 version = "2.20.5"
@@ -2518,7 +2576,7 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
     {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
@@ -2531,7 +2589,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_aarch64.whl", hash = "sha256:84fb38465a5bc7c70cbc320cfd0963eb302ee25a5e939e9f512bbba55b6072fb"},
     {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl", hash = "sha256:562ab97ea2c23164823b2a89cb328d01d45cb99634b8c65fe7cd60d14562bd79"},
@@ -2545,7 +2603,7 @@ description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
@@ -2553,19 +2611,19 @@ files = [
 
 [[package]]
 name = "omegaconf"
-version = "2.0.6"
+version = "2.3.0"
 description = "A flexible configuration library"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
 files = [
-    {file = "omegaconf-2.0.6-py3-none-any.whl", hash = "sha256:9e349fd76819b95b47aa628edea1ff83fed5b25108608abdd6c7fdca188e302a"},
-    {file = "omegaconf-2.0.6.tar.gz", hash = "sha256:92ca535a788d21651bf4c2eaf5c1ca4c7a8003b2dab4a87cbb09109784268806"},
+    {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"},
+    {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"},
 ]
 
 [package.dependencies]
-PyYAML = ">=5.1"
-typing-extensions = "*"
+antlr4-python3-runtime = "==4.9.*"
+PyYAML = ">=5.1.0"
 
 [[package]]
 name = "onnxruntime"
@@ -3834,6 +3892,44 @@ cffi = ">=1.0"
 [package.extras]
 numpy = ["numpy"]
 
+[[package]]
+name = "soxr"
+version = "0.5.0.post1"
+description = "High quality, one-dimensional sample-rate conversion library"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"},
+    {file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"},
+    {file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"},
+    {file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"},
+    {file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"},
+    {file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"},
+]
+
+[package.dependencies]
+numpy = "*"
+
+[package.extras]
+docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
+test = ["pytest"]
+
 [[package]]
 name = "starlette"
 version = "0.22.0"
@@ -3951,101 +4047,32 @@ files = [
 
 [[package]]
 name = "torch"
-version = "2.1.1+cu118"
+version = "2.4.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
 groups = ["main"]
-markers = "sys_platform == \"linux\""
 files = [
-    {file = "torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl", hash = "sha256:f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c"},
-]
-
-[package.dependencies]
-filelock = "*"
-fsspec = "*"
-jinja2 = "*"
-networkx = "*"
-sympy = "*"
-triton = "2.1.0"
-typing-extensions = "*"
-
-[package.extras]
-dynamo = ["jinja2"]
-opt-einsum = ["opt-einsum (>=3.3)"]
-
-[package.source]
-type = "url"
-url = "https://download.pytorch.org/whl/cu118/torch-2.1.1%2Bcu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c"
-
-[[package]]
-name = "torch"
-version = "2.1.1+cu118"
-description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-optional = false
-python-versions = ">=3.8.0"
-groups = ["main"]
-markers = "sys_platform == \"win32\""
-files = [
-    {file = "torch-2.1.1+cu118-cp311-cp311-win_amd64.whl", hash = "sha256:d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a"},
-]
-
-[package.dependencies]
-filelock = "*"
-fsspec = "*"
-jinja2 = "*"
-networkx = "*"
-nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.18.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-sympy = "*"
-triton = {version = "2.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-typing-extensions = "*"
-
-[package.extras]
-opt-einsum = ["opt-einsum (>=3.3)"]
-
-[package.source]
-type = "url"
-url = "https://download.pytorch.org/whl/cu118/torch-2.1.1%2Bcu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a"
-
-[[package]]
-name = "torch"
-version = "2.4.0"
-description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
-optional = false
-python-versions = ">=3.8.0"
-groups = ["main"]
-markers = "sys_platform != \"win32\" and sys_platform != \"linux\""
-files = [
-    {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"},
-    {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"},
-    {file = "torch-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2497cbc7b3c951d69b276ca51fe01c2865db67040ac67f5fc20b03e41d16ea4a"},
-    {file = "torch-2.4.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:685418ab93730efbee71528821ff54005596970dd497bf03c89204fb7e3f71de"},
-    {file = "torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e743adadd8c8152bb8373543964551a7cb7cc20ba898dc8f9c0cdbe47c283de0"},
-    {file = "torch-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7334325c0292cbd5c2eac085f449bf57d3690932eac37027e193ba775703c9e6"},
-    {file = "torch-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:97730014da4c57ffacb3c09298c6ce05400606e890bd7a05008d13dd086e46b1"},
-    {file = "torch-2.4.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f169b4ea6dc93b3a33319611fcc47dc1406e4dd539844dcbd2dec4c1b96e166d"},
-    {file = "torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:997084a0f9784d2a89095a6dc67c7925e21bf25dea0b3d069b41195016ccfcbb"},
-    {file = "torch-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc3988e8b36d1e8b998d143255d9408d8c75da4ab6dd0dcfd23b623dfb0f0f57"},
-    {file = "torch-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:3374128bbf7e62cdaed6c237bfd39809fbcfaa576bee91e904706840c3f2195c"},
-    {file = "torch-2.4.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:91aaf00bfe1ffa44dc5b52809d9a95129fca10212eca3ac26420eb11727c6288"},
-    {file = "torch-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc30457ea5489c62747d3306438af00c606b509d78822a88f804202ba63111ed"},
-    {file = "torch-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a046491aaf96d1215e65e1fa85911ef2ded6d49ea34c8df4d0638879f2402eef"},
-    {file = "torch-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:688eec9240f3ce775f22e1e1a5ab9894f3d5fe60f3f586deb7dbd23a46a83916"},
-    {file = "torch-2.4.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:3af4de2a618fb065e78404c4ba27a818a7b7957eaeff28c6c66ce7fb504b68b8"},
-    {file = "torch-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:618808d3f610d5f180e47a697d4ec90b810953bb1e020f424b2ac7fb0884b545"},
-    {file = "torch-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ed765d232d23566052ba83632ec73a4fccde00b4c94ad45d63b471b09d63b7a7"},
-    {file = "torch-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2feb98ac470109472fb10dfef38622a7ee08482a16c357863ebc7bc7db7c8f7"},
-    {file = "torch-2.4.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8940fc8b97a4c61fdb5d46a368f21f4a3a562a17879e932eb51a5ec62310cb31"},
+    {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"},
+    {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"},
+    {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"},
+    {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"},
+    {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"},
+    {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"},
+    {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
+    {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
+    {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
+    {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
+    {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
+    {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
 ]
 
 [package.dependencies]
@@ -4064,6 +4091,7 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"
 nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = "*"
 sympy = "*"
 triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
 typing-extensions = ">=4.8.0"
@@ -4073,94 +4101,80 @@ opt-einsum = ["opt-einsum (>=3.3)"]
 optree = ["optree (>=0.11.0)"]
 
 [[package]]
-name = "torchaudio"
-version = "2.1.1+cu118"
-description = "An audio package for PyTorch"
+name = "torch-directml"
+version = "0.2.5.dev240914"
+description = "A DirectML backend for hardware acceleration in PyTorch."
 optional = false
-python-versions = "*"
+python-versions = ">=3.7"
 groups = ["main"]
-markers = "sys_platform == \"linux\""
 files = [
-    {file = "torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl", hash = "sha256:2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4"},
+    {file = "torch_directml-0.2.5.dev240914-cp310-cp310-manylinux2010_x86_64.whl", hash = "sha256:30e29872f4d6059dd784897fa2df0cddb80f3874ed4f0860758790286e95a823"},
+    {file = "torch_directml-0.2.5.dev240914-cp310-cp310-win_amd64.whl", hash = "sha256:9fe494ffd2c8ab9079f13404d052fc261cd8efb639a776c9075e58d9c64d6cb2"},
+    {file = "torch_directml-0.2.5.dev240914-cp311-cp311-manylinux2010_x86_64.whl", hash = "sha256:6afb675585d30018c813e5ba203a3437073748919af8ab3e910092a0e0ec531f"},
+    {file = "torch_directml-0.2.5.dev240914-cp311-cp311-win_amd64.whl", hash = "sha256:3315b6c7e898685827607f1d8170dacc386ac248502aba9bd36cf82e78d930bb"},
+    {file = "torch_directml-0.2.5.dev240914-cp312-cp312-manylinux2010_x86_64.whl", hash = "sha256:26915aff5008a8567ea7641b74cf8cb53c1767d0c7163fc06e0a587e7c1c9dce"},
+    {file = "torch_directml-0.2.5.dev240914-cp312-cp312-win_amd64.whl", hash = "sha256:ea19d11e33e9450b290311c06f7eb10924dd25c555e504d367b7b437d3eb24d0"},
+    {file = "torch_directml-0.2.5.dev240914-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:9b263c7d88ea225ce35b116441e1585fc753e9d69f64a8b6d83aabdc6c511517"},
+    {file = "torch_directml-0.2.5.dev240914-cp38-cp38-win_amd64.whl", hash = "sha256:6e18fb706d15cc6d0d3de49f46a7edc07ae669531c851d7d8f98855f4974f9e6"},
+    {file = "torch_directml-0.2.5.dev240914-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:9c1988e95694520b31f1056bc038b76a62c9ca63f2d4e93f1abea1d293ec49ce"},
+    {file = "torch_directml-0.2.5.dev240914-cp39-cp39-win_amd64.whl", hash = "sha256:488509f0e8deb22f052b56f5cdad3a55878b65a7d99ee4b448fb4ab3cbb8d8ea"},
 ]
 
 [package.dependencies]
-torch = "2.1.1"
-
-[package.source]
-type = "url"
-url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1%2Bcu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4"
+torch = "2.4.1"
+torchvision = "0.19.1"
 
 [[package]]
 name = "torchaudio"
-version = "2.1.1+cu118"
+version = "2.4.1"
 description = "An audio package for PyTorch"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "sys_platform == \"win32\""
 files = [
-    {file = "torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl", hash = "sha256:79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2"},
+    {file = "torchaudio-2.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661909751909340b24f637410dfec02a888867816c3db19ed4f4102ae105244a"},
+    {file = "torchaudio-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bfc234cef1d03092ea27440fb79e486722ccb41cff94ebaf9d5a1082436395fe"},
+    {file = "torchaudio-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:54431179d9a9ccf3feeae98aace07d89fae9fd728e2bc8656efbd70e7edcc6f8"},
+    {file = "torchaudio-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:dec97872215c3122b7718ec47ac63e143565c3cced06444d0225e98bf4dd4b5f"},
+    {file = "torchaudio-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60af1531815d22659e5412ea401bed552a16c389938c49664e446e4cfd5ddc06"},
+    {file = "torchaudio-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:95a0968569f7f4455bfd242bfcd489ec47ad37d2ba0f3d9f738cd1128a5f775c"},
+    {file = "torchaudio-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7640aaffb2056e12f2906187b03a22228a0908c87d0295fddf4b0b92334a290b"},
+    {file = "torchaudio-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:3c08b42a0c296c8eeee6c533bcae5cfbc0ceae86a34f24fe6bbbb5faa7a7bea1"},
+    {file = "torchaudio-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:953946cf610ffd57bb3fdd228effa2112fa51c5dfe36a96611effc9074a3d3be"},
+    {file = "torchaudio-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:1796a8961decb522c47daab0fbe27c057d6d143ee22bb6ae0d5eb9b2a038c7b6"},
+    {file = "torchaudio-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:5b62fc7b16ed708b0c07d4393137797e92f63fc3bd5705607d97ba6a9a7cf3f0"},
+    {file = "torchaudio-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:d721b186aae7bd8752c9ad95213f5d650926597bb9060728dfe476986a1ff570"},
+    {file = "torchaudio-2.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4ea0fd00142fe795c75bcc20a303981b56f2327c7f7d321b42a8fef1d78aafa9"},
+    {file = "torchaudio-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:375d8740c8035a50faca7a5afe2fbdb712aa8733715b971b2af61b4003fa1c41"},
+    {file = "torchaudio-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:74d19cf9ca3dad394afcabb7e6f7ed9ab9f59f2540d502826c7ec3e33985251d"},
+    {file = "torchaudio-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:40e9fa8fdc8d328ea4aa90be65fd34c5ef975610dbd707545e3664393a8a2497"},
+    {file = "torchaudio-2.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3adce550850902b9aa6cd2378ccd720ac9ec8cf31e2eba9743ccc84ffcbe76d6"},
+    {file = "torchaudio-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:98d8e03703f96b13a8d172d1ccdc7badb338227fd762985fdcea6b30f6697bdb"},
+    {file = "torchaudio-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:36c7e7bc6b358cbf42b769c80206780fa1497d141a985c6b3e7768de44524e9a"},
+    {file = "torchaudio-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:f46e34ab3866ad8d8ace0673cd11e697c5cde6a3b7a4d8d789207d4d8badbb6e"},
 ]
 
 [package.dependencies]
-torch = "2.1.1+cu118"
-
-[package.source]
-type = "url"
-url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1%2Bcu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2"
-
-[[package]]
-name = "torchaudio"
-version = "2.4.0"
-description = "An audio package for PyTorch"
-optional = false
-python-versions = "*"
-groups = ["main"]
-markers = "sys_platform != \"win32\" and sys_platform != \"linux\""
-files = [
-    {file = "torchaudio-2.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:733e9d859b88dabefeaf008e3ab2b8c7885b29466068b4b79a42766be4619e46"},
-    {file = "torchaudio-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:c48bab82a9ee0c67b9323c2ebbe0890a34c5815d1ff1ace77b1c9df4e6fdbbff"},
-    {file = "torchaudio-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:17cb73d4336771d455cd8dda8b4891307a5346b890a4e6b1d4b73d565258fee1"},
-    {file = "torchaudio-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:af19edc1c3c0ac626f594fc67f087db401016d9216af8d62b6c6ff731efbae43"},
-    {file = "torchaudio-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acbcf9129ffcfce808254e2cbff103363c505ce06ed4c4231b3f436a10679d4d"},
-    {file = "torchaudio-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:393ee8c24110ccc8030482c10cd9d5d0b5e528f6a9dd3d60557e1151aa951b13"},
-    {file = "torchaudio-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:be969c09466db35e0d79b8b09dff66caedbb9569b42c903a2d5e0db2af760e3c"},
-    {file = "torchaudio-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:2993a3288b2b451bf90c7c4d65991b5769e2614d923e295f08a10066ce79d3c0"},
-    {file = "torchaudio-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ae13a95ef6fabcadb0eff36d85f5048d70474a2e9704fa9c86e9903cbcec0d4a"},
-    {file = "torchaudio-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:4782a49171d94431bb815a55aa72733f5fe38034bdf6adeced28c226e2cc791b"},
-    {file = "torchaudio-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:534d1907bb252ecd2ba9e1d61cff7220fd66090e63df7b3c109cea77a19d4cb8"},
-    {file = "torchaudio-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:473c149c1c5288f4ce7b609c5ecb7b2528e7958ea701147a20413d65e5a8a59c"},
-    {file = "torchaudio-2.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fc3f8ecd6f0bbfc654d3bc52756a7ca359f1d88b4fa0290e1cdb763a3131b7b9"},
-    {file = "torchaudio-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2bcd9700f8ec70804cc9c48d4f6f3fa7372f52421eebb64d02c04bf805ad284d"},
-    {file = "torchaudio-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d7fe9e7f2fe8250fde07b20356c44d770d5faa3ca277abdcda3af7d484048fba"},
-    {file = "torchaudio-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:6a10d3c29097a4d81533ab79e351c93d6d91eb1584671d5eee59ba3c259be796"},
-    {file = "torchaudio-2.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1fd670c808e322c101957a07651e29935f86ec389243c0c43a24edd7a1854841"},
-    {file = "torchaudio-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1eecb83c123577779a45381de3a38e4add132a80104cff4afd816913f51ca17b"},
-    {file = "torchaudio-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c840894de12a6dd3ea57cbb0d0086123aaa48001ba3ad99ef714fe009eae8eb9"},
-    {file = "torchaudio-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:608fd609cdd8323ef4a50c1e984a0be7282a6c630fad22e040e957f8e376950e"},
-]
-
-[package.dependencies]
-torch = "2.4.0"
+torch = "2.4.1"
 
 [[package]]
 name = "torchcrepe"
-version = "0.0.20"
+version = "0.0.23"
 description = "Pytorch implementation of CREPE pitch tracker"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "torchcrepe-0.0.20-py3-none-any.whl", hash = "sha256:a8803df94cd7675c0ee1f8f6ca6ea78075bca0825497632914b76e0dd5b61d31"},
-    {file = "torchcrepe-0.0.20.tar.gz", hash = "sha256:b4eac23d0a85582739fe69eeb3136e2fb6ebb7e5363a6556f7c9fe83b11f8e2c"},
+    {file = "torchcrepe-0.0.23-py3-none-any.whl", hash = "sha256:6e104465b89e763ba7fd0d1b228162783ed4b5a6c5735772baca286c20d7ae2c"},
+    {file = "torchcrepe-0.0.23.tar.gz", hash = "sha256:8f7e75638a5ab3fbb9cfc1704c173adbcb61de73dbdadcf428b3d93b54dfe57f"},
 ]
 
 [package.dependencies]
-librosa = "0.9.1"
+librosa = ">=0.9.1"
 resampy = "*"
 scipy = "*"
 torch = "*"
+torchaudio = "*"
 tqdm = "*"
 
 [[package]]
@@ -4181,6 +4195,45 @@ numpy = "*"
 torch = "*"
 torchaudio = "*"
 
+[[package]]
+name = "torchvision"
+version = "0.19.1"
+description = "image and video datasets and models for torch deep learning"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04"},
+    {file = "torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145"},
+    {file = "torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8"},
+    {file = "torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c"},
+    {file = "torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9"},
+    {file = "torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289"},
+    {file = "torchvision-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4e4f5b24ea6b087b02ed492ab1e21bba3352c4577e2def14248cfc60732338"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9281d63ead929bb19143731154cd1d8bf0b5e9873dff8578a40e90a6bec3c6fa"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4d10bc9083c4d5fadd7edd7b729700a7be48dab4f62278df3bc73fa48e48a155"},
+    {file = "torchvision-0.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ccf085ef1824fb9e16f1901285bf89c298c62dfd93267a39e8ee42c71255242f"},
+    {file = "torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5"},
+    {file = "torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614"},
+]
+
+[package.dependencies]
+numpy = "*"
+pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
+torch = "2.4.1"
+
+[package.extras]
+gdown = ["gdown (>=4.7.3)"]
+scipy = ["scipy"]
+
 [[package]]
 name = "tornado"
 version = "6.4.1"
@@ -4239,33 +4292,6 @@ files = [
 docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
 test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
 
-[[package]]
-name = "triton"
-version = "2.1.0"
-description = "A language and compiler for custom Deep Learning operations"
-optional = false
-python-versions = "*"
-groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\" or sys_platform == \"linux\""
-files = [
-    {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"},
-    {file = "triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:919b06453f0033ea52c13eaf7833de0e57db3178d23d4e04f9fc71c4f2c32bf8"},
-    {file = "triton-2.1.0-0-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae4bb8a91de790e1866405211c4d618379781188f40d5c4c399766914e84cd94"},
-    {file = "triton-2.1.0-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39f6fb6bdccb3e98f3152e3fbea724f1aeae7d749412bbb1fa9c441d474eba26"},
-    {file = "triton-2.1.0-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21544e522c02005a626c8ad63d39bdff2f31d41069592919ef281e964ed26446"},
-    {file = "triton-2.1.0-0-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:143582ca31dd89cd982bd3bf53666bab1c7527d41e185f9e3d8a3051ce1b663b"},
-    {file = "triton-2.1.0-0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82fc5aeeedf6e36be4e4530cbdcba81a09d65c18e02f52dc298696d45721f3bd"},
-    {file = "triton-2.1.0-0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81a96d110a738ff63339fc892ded095b31bd0d205e3aace262af8400d40b6fa8"},
-]
-
-[package.dependencies]
-filelock = "*"
-
-[package.extras]
-build = ["cmake (>=3.18)", "lit"]
-tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"]
-tutorials = ["matplotlib", "pandas", "tabulate"]
-
 [[package]]
 name = "triton"
 version = "3.0.0"
@@ -4273,7 +4299,7 @@ description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
     {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"},
@@ -4573,4 +4599,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<3.12"
-content-hash = "11dff0dae7badee83fcbe401167de506736348a975eebf3f9e3015f3b3c78584"
+content-hash = "5803c6efb57eca486961794f233953c199047b36c5e823e7310488a6adc91fdd"
diff --git a/pyproject.toml b/pyproject.toml
index 8290789..58d7d35 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,18 +60,18 @@ av = "*"
 
 # ---------------------------------------------------------------------------
 # --- NVIDIA GPU configuration ---
-torch = [
-  { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a", markers = "sys_platform == 'win32'" },
-  { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c", markers = "sys_platform == 'linux'" }
-]
-torchaudio = [
-  { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2", markers = "sys_platform == 'win32'" },
-  { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4", markers = "sys_platform == 'linux'" }
-]
+# torch = [
+  # { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a", markers = "sys_platform == 'win32'" },
+  # { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c", markers = "sys_platform == 'linux'" }
+# ]
+# torchaudio = [
+  # { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2", markers = "sys_platform == 'win32'" },
+  # { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4", markers = "sys_platform == 'linux'" }
+# ]
 # --- AMD GPU configuration ---
-# torch = "2.4.1"
-# torchaudio = "2.4.1"
-# torch-directml = "^0.2.5.dev240914"
+torch = "2.4.1"
+torchaudio = "2.4.1"
+torch-directml = "^0.2.5.dev240914"
 # ---------------------------------------------------------------------------
 
 [tool.poetry.group.dev.dependencies]
diff --git a/run.sh b/run.sh
deleted file mode 100755
index f239307..0000000
--- a/run.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/sh
-
-if [ "$(uname)" = "Darwin" ]; then
-  # macOS specific env:
-  export PYTORCH_ENABLE_MPS_FALLBACK=1
-  export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
-elif [ "$(uname)" != "Linux" ]; then
-  echo "Unsupported operating system."
-  exit 1
-fi
-
-if [ -d ".venv" ]; then
-  echo "Activate venv..."
-  . .venv/bin/activate
-else
-  echo "Create venv..."
-  requirements_file="requirements.txt"
-
-  # Check if Python 3.8 is installed
-  if ! command -v python3.8 >/dev/null 2>&1 || pyenv versions --bare | grep -q "3.8"; then
-    echo "Python 3 not found. Attempting to install 3.8..."
-    if [ "$(uname)" = "Darwin" ] && command -v brew >/dev/null 2>&1; then
-      brew install python@3.8
-    elif [ "$(uname)" = "Linux" ] && command -v apt-get >/dev/null 2>&1; then
-      sudo apt-get update
-      sudo apt-get install python3.8
-    else
-      echo "Please install Python 3.8 manually."
-      exit 1
-    fi
-  fi
-
-  python3.8 -m venv .venv
-  . .venv/bin/activate
-
-  # Check if required packages are installed and install them if not
-  if [ -f "${requirements_file}" ]; then
-    installed_packages=$(python3.8 -m pip freeze)
-    while IFS= read -r package; do
-      expr "${package}" : "^#.*" > /dev/null && continue
-      package_name=$(echo "${package}" | sed 's/[<>=!].*//')
-      if ! echo "${installed_packages}" | grep -q "${package_name}"; then
-        echo "${package_name} not found. Attempting to install..."
-        python3.8 -m pip install --upgrade "${package}"
-      fi
-    done < "${requirements_file}"
-  else
-    echo "${requirements_file} not found. Please ensure the requirements file with required packages exists."
-    exit 1
-  fi
-fi
-
-# Download models
-chmod +x tools/dlmodels.sh
-./tools/dlmodels.sh
-
-if [ $? -ne 0 ]; then
-  exit 1
-fi
-
-# Run the main script
-python3.8 infer-web.py --pycmd python3.8
diff --git a/tools/dlmodels.bat b/tools/dlmodels.bat
deleted file mode 100644
index b83825a..0000000
--- a/tools/dlmodels.bat
+++ /dev/null
@@ -1,362 +0,0 @@
-@echo off && chcp 65001
-
-echo working dir is %cd%
-echo downloading requirement aria2 check.
-echo=
-dir /a:d/b | findstr "aria2" > flag.txt
-findstr "aria2" flag.txt >nul
-if %errorlevel% ==0 (
-    echo aria2 checked.
-    echo=
-) else (
-    echo failed. please downloading aria2 from webpage!
-    echo unzip it and put in this directory!
-    timeout /T 5
-    start https://github.com/aria2/aria2/releases/tag/release-1.36.0
-    echo=
-    goto end
-)
-
-echo envfiles checking start.
-echo=
-
-for /f %%x in ('findstr /i /c:"aria2" "flag.txt"') do (set aria2=%%x)&goto endSch
-:endSch
-
-set d32=f0D32k.pth
-set d40=f0D40k.pth
-set d48=f0D48k.pth
-set g32=f0G32k.pth
-set g40=f0G40k.pth
-set g48=f0G48k.pth
-
-set d40v2=f0D40k.pth
-set g40v2=f0G40k.pth
-
-set dld32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth
-set dld40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth
-set dld48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth
-set dlg32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth
-set dlg40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth
-set dlg48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth
-
-set dld40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth
-set dlg40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth
-
-set hp2_all=HP2_all_vocals.pth
-set hp3_all=HP3_all_vocals.pth
-set hp5_only=HP5_only_main_vocal.pth
-set VR_DeEchoAggressive=VR-DeEchoAggressive.pth
-set VR_DeEchoDeReverb=VR-DeEchoDeReverb.pth
-set VR_DeEchoNormal=VR-DeEchoNormal.pth
-set onnx_dereverb=vocals.onnx
-
-set dlhp2_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
-set dlhp3_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth
-set dlhp5_only=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth
-set dlVR_DeEchoAggressive=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth
-set dlVR_DeEchoDeReverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth
-set dlVR_DeEchoNormal=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth
-set dlonnx_dereverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
-
-set hb=hubert_base.pt
-
-set dlhb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt
-
-set rmvpe=rmvpe.pt
-set dlrmvpe=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt
-
-echo dir check start.
-echo=
-
-if exist "%~dp0assets\pretrained" (
-        echo dir .\assets\pretrained checked.
-    ) else (
-        echo failed. generating dir .\assets\pretrained.
-        mkdir pretrained
-    )
-if exist "%~dp0assets\pretrained_v2" (
-        echo dir .\assets\pretrained_v2 checked.
-    ) else (
-        echo failed. generating dir .\assets\pretrained_v2.
-        mkdir pretrained_v2
-    )    
-if exist "%~dp0assets\uvr5_weights" (
-        echo dir .\assets\uvr5_weights checked.
-    ) else (
-        echo failed. generating dir .\assets\uvr5_weights.
-        mkdir uvr5_weights
-    )
-if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy" (
-        echo dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
-    ) else (
-        echo failed. generating dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy.
-        mkdir uvr5_weights\onnx_dereverb_By_FoxJoy
-    )    
-
-echo=
-echo dir check finished.
-
-echo=
-echo required files check start.
-
-echo checking D32k.pth
-if exist "%~dp0assets\pretrained\D32k.pth" (
-        echo D32k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d %~dp0assets\pretrained -o D32k.pth
-        if exist "%~dp0assets\pretrained\D32k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking D40k.pth
-if exist "%~dp0assets\pretrained\D40k.pth" (
-        echo D40k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d %~dp0assets\pretrained -o D40k.pth
-        if exist "%~dp0assets\pretrained\D40k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking D40k.pth
-if exist "%~dp0assets\pretrained_v2\D40k.pth" (
-        echo D40k.pth in .\assets\pretrained_v2 checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d %~dp0assets\pretrained_v2 -o D40k.pth
-        if exist "%~dp0assets\pretrained_v2\D40k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )    
-echo checking D48k.pth
-if exist "%~dp0assets\pretrained\D48k.pth" (
-        echo D48k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d %~dp0assets\pretrained -o D48k.pth
-        if exist "%~dp0assets\pretrained\D48k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking G32k.pth
-if exist "%~dp0assets\pretrained\G32k.pth" (
-        echo G32k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d %~dp0assets\pretrained -o G32k.pth
-        if exist "%~dp0assets\pretrained\G32k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking G40k.pth
-if exist "%~dp0assets\pretrained\G40k.pth" (
-        echo G40k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d %~dp0assets\pretrained -o G40k.pth
-        if exist "%~dp0assets\pretrained\G40k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking G40k.pth
-if exist "%~dp0assets\pretrained_v2\G40k.pth" (
-        echo G40k.pth in .\assets\pretrained_v2 checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d %~dp0assets\pretrained_v2 -o G40k.pth
-        if exist "%~dp0assets\pretrained_v2\G40k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )    
-echo checking G48k.pth
-if exist "%~dp0assets\pretrained\G48k.pth" (
-        echo G48k.pth in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d %~dp0assets\pretrained -o G48k.pth
-        if exist "%~dp0assets\pretrained\G48k.pth" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-
-echo checking %d32%
-if exist "%~dp0assets\pretrained\%d32%" (
-        echo %d32% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld32% -d %~dp0assets\pretrained -o %d32%
-        if exist "%~dp0assets\pretrained\%d32%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %d40%
-if exist "%~dp0assets\pretrained\%d40%" (
-        echo %d40% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40% -d %~dp0assets\pretrained -o %d40%
-        if exist "%~dp0assets\pretrained\%d40%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %d40v2%
-if exist "%~dp0assets\pretrained_v2\%d40v2%" (
-        echo %d40v2% in .\assets\pretrained_v2 checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40v2% -d %~dp0assets\pretrained_v2 -o %d40v2%
-        if exist "%~dp0assets\pretrained_v2\%d40v2%" (echo download successful.) else (echo please try again!
-        echo=)
-    )    
-echo checking %d48%
-if exist "%~dp0assets\pretrained\%d48%" (
-        echo %d48% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld48% -d %~dp0assets\pretrained -o %d48%
-        if exist "%~dp0assets\pretrained\%d48%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %g32%
-if exist "%~dp0assets\pretrained\%g32%" (
-        echo %g32% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg32% -d %~dp0assets\pretrained -o %g32%
-        if exist "%~dp0assets\pretrained\%g32%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %g40%
-if exist "%~dp0assets\pretrained\%g40%" (
-        echo %g40% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40% -d %~dp0assets\pretrained -o %g40%
-        if exist "%~dp0assets\pretrained\%g40%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %g40v2%
-if exist "%~dp0assets\pretrained_v2\%g40v2%" (
-        echo %g40v2% in .\assets\pretrained_v2 checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40v2% -d %~dp0assets\pretrained_v2 -o %g40v2%
-        if exist "%~dp0assets\pretrained_v2\%g40v2%" (echo download successful.) else (echo please try again!
-        echo=)
-    )    
-echo checking %g48%
-if exist "%~dp0assets\pretrained\%g48%" (
-        echo %g48% in .\assets\pretrained checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg48% -d %~dp0assets\pretrained -o %g48%
-        if exist "%~dp0assets\pretrained\%g48%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-
-echo checking %hp2_all%
-if exist "%~dp0assets\uvr5_weights\%hp2_all%" (
-        echo %hp2_all% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp2_all% -d %~dp0assets\uvr5_weights -o %hp2_all%
-        if exist "%~dp0assets\uvr5_weights\%hp2_all%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %hp3_all%
-if exist "%~dp0assets\uvr5_weights\%hp3_all%" (
-        echo %hp3_all% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp3_all% -d %~dp0assets\uvr5_weights -o %hp3_all%
-        if exist "%~dp0assets\uvr5_weights\%hp3_all%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %hp5_only%
-if exist "%~dp0assets\uvr5_weights\%hp5_only%" (
-        echo %hp5_only% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp5_only% -d %~dp0assets\uvr5_weights -o %hp5_only%
-        if exist "%~dp0assets\uvr5_weights\%hp5_only%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %VR_DeEchoAggressive%
-if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" (
-        echo %VR_DeEchoAggressive% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoAggressive% -d %~dp0assets\uvr5_weights -o %VR_DeEchoAggressive%
-        if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %VR_DeEchoDeReverb%
-if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" (
-        echo %VR_DeEchoDeReverb% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoDeReverb% -d %~dp0assets\uvr5_weights -o %VR_DeEchoDeReverb%
-        if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %VR_DeEchoNormal%
-if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" (
-        echo %VR_DeEchoNormal% in .\assets\uvr5_weights checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoNormal% -d %~dp0assets\uvr5_weights -o %VR_DeEchoNormal%
-        if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-echo checking %onnx_dereverb%
-if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (
-        echo %onnx_dereverb% in .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlonnx_dereverb% -d %~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy -o %onnx_dereverb%
-        if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (echo download successful.) else (echo please try again!
-        echo=)
-    )    
-
-echo checking %hb%
-if exist "%~dp0assets\hubert\%hb%" (
-        echo %hb% in .\assets\hubert checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhb% -d %~dp0assets\hubert\ -o %hb%
-        if exist "%~dp0assets\hubert\%hb%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-
-echo checking %rmvpe%
-if exist "%~dp0assets\rmvpe\%rmvpe%" (
-        echo %rmvpe% in .\assets\rmvpe checked.
-        echo=
-    ) else (
-        echo failed. starting download from huggingface.
-        %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlrmvpe% -d %~dp0assets\rmvpe\ -o %rmvpe%
-        if exist "%~dp0assets\rmvpe\%rmvpe%" (echo download successful.) else (echo please try again!
-        echo=)
-    )
-
-echo required files check finished.
-echo envfiles check complete.
-pause
-:end
-del flag.txt
diff --git a/tools/dlmodels.sh b/tools/dlmodels.sh
deleted file mode 100755
index 9482db4..0000000
--- a/tools/dlmodels.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/sh
-
-printf "working dir is %s\n" "$PWD"
-echo "downloading requirement aria2 check."
-
-if command -v aria2c > /dev/null 2>&1
-then
-    echo "aria2 command found"
-else
-    echo "failed. please install aria2"
-    exit 1
-fi
-
-echo "dir check start."
-
-check_dir() {
-    [ -d "$1" ] && printf "dir %s checked\n" "$1" || \
-    printf "failed. generating dir %s\n" "$1" && mkdir -p "$1"
-}
-
-check_dir "./assets/pretrained"
-check_dir "./assets/pretrained_v2"
-check_dir "./assets/uvr5_weights"
-check_dir "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy"
-
-echo "dir check finished."
-
-echo "required files check start."
-check_file_pretrained() {
-  printf "checking %s\n" "$2"
-  if [ -f "./assets/""$1""/""$2""" ]; then
-      printf "%s in ./assets/%s checked.\n" "$2" "$1" 
-  else
-      echo failed. starting download from huggingface.
-      if command -v aria2c > /dev/null 2>&1; then
-          aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$1"/"$2" -d ./assets/"$1" -o "$2"
-          [ -f "./assets/""$1""/""$2""" ] && echo "download successful." || { echo "please try again!" && exit 1; }
-      else
-          echo "aria2c command not found. Please install aria2c and try again."
-          exit 1
-      fi
-  fi
-}
-
-check_file_special() {
-  printf "checking %s\n" "$2"
-  if [ -f "./assets/""$1""/""$2""" ]; then
-      printf "%s in ./assets/%s checked.\n" "$2" "$1" 
-  else
-      echo failed. starting download from huggingface.
-      if command -v aria2c > /dev/null 2>&1; then
-          aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$2" -d ./assets/"$1" -o "$2"
-          [ -f "./assets/""$1""/""$2""" ] && echo "download successful." || { echo "please try again!" && exit 1; }
-      else
-          echo "aria2c command not found. Please install aria2c and try again."
-          exit 1
-      fi
-  fi
-}
-
-check_file_pretrained pretrained D32k.pth
-check_file_pretrained pretrained D40k.pth
-check_file_pretrained pretrained D48k.pth
-check_file_pretrained pretrained G32k.pth
-check_file_pretrained pretrained G40k.pth
-check_file_pretrained pretrained G48k.pth
-check_file_pretrained pretrained_v2 f0D40k.pth
-check_file_pretrained pretrained_v2 f0G40k.pth
-check_file_pretrained pretrained_v2 D40k.pth
-check_file_pretrained pretrained_v2 G40k.pth
-check_file_pretrained uvr5_weights HP2_all_vocals.pth
-check_file_pretrained uvr5_weights HP3_all_vocals.pth
-check_file_pretrained uvr5_weights HP5_only_main_vocal.pth
-check_file_pretrained uvr5_weights VR-DeEchoAggressive.pth
-check_file_pretrained uvr5_weights VR-DeEchoDeReverb.pth
-check_file_pretrained uvr5_weights VR-DeEchoNormal.pth
-check_file_pretrained uvr5_weights "onnx_dereverb_By_FoxJoy/vocals.onnx"
-check_file_special rmvpe rmvpe.pt
-check_file_special hubert hubert_base.pt
-
-echo "required files check finished."
diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py
deleted file mode 100644
index 9a7399c..0000000
--- a/tools/rvc_for_realtime.py
+++ /dev/null
@@ -1,445 +0,0 @@
-from io import BytesIO
-import os
-import pickle
-import sys
-import traceback
-from infer.lib import jit
-from infer.lib.jit.get_synthesizer import get_synthesizer
-from time import time as ttime
-import fairseq
-import faiss
-import numpy as np
-import parselmouth
-import pyworld
-import scipy.signal as signal
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchcrepe
-
-from infer.lib.infer_pack.models import (
-    SynthesizerTrnMs256NSFsid,
-    SynthesizerTrnMs256NSFsid_nono,
-    SynthesizerTrnMs768NSFsid,
-    SynthesizerTrnMs768NSFsid_nono,
-)
-
-now_dir = os.getcwd()
-sys.path.append(now_dir)
-from multiprocessing import Manager as M
-
-from configs.config import Config
-
-# config = Config()
-
-mm = M()
-
-
-def printt(strr, *args):
-    if len(args) == 0:
-        print(strr)
-    else:
-        print(strr % args)
-
-
-# config.device=torch.device("cpu")########强制cpu测试
-# config.is_half=False########强制cpu测试
-class RVC:
-    def __init__(
-        self,
-        key,
-        pth_path,
-        index_path,
-        index_rate,
-        n_cpu,
-        inp_q,
-        opt_q,
-        config: Config,
-        last_rvc=None,
-    ) -> None:
-        """
-        初始化
-        """
-        try:
-            if config.dml == True:
-
-                def forward_dml(ctx, x, scale):
-                    ctx.scale = scale
-                    res = x.clone().detach()
-                    return res
-
-                fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
-            # global config
-            self.config = config
-            self.inp_q = inp_q
-            self.opt_q = opt_q
-            # device="cpu"########强制cpu测试
-            self.device = config.device
-            self.f0_up_key = key
-            self.f0_min = 50
-            self.f0_max = 1100
-            self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
-            self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
-            self.n_cpu = n_cpu
-            self.use_jit = self.config.use_jit
-            self.is_half = config.is_half
-
-            if index_rate != 0:
-                self.index = faiss.read_index(index_path)
-                self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
-                printt("Index search enabled")
-            self.pth_path: str = pth_path
-            self.index_path = index_path
-            self.index_rate = index_rate
-            self.cache_pitch: torch.Tensor = torch.zeros(
-                1024, device=self.device, dtype=torch.long
-            )
-            self.cache_pitchf = torch.zeros(
-                1024, device=self.device, dtype=torch.float32
-            )
-
-            if last_rvc is None:
-                models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
-                    ["assets/hubert/hubert_base.pt"],
-                    suffix="",
-                )
-                hubert_model = models[0]
-                hubert_model = hubert_model.to(self.device)
-                if self.is_half:
-                    hubert_model = hubert_model.half()
-                else:
-                    hubert_model = hubert_model.float()
-                hubert_model.eval()
-                self.model = hubert_model
-            else:
-                self.model = last_rvc.model
-
-            self.net_g: nn.Module = None
-
-            def set_default_model():
-                self.net_g, cpt = get_synthesizer(self.pth_path, self.device)
-                self.tgt_sr = cpt["config"][-1]
-                cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
-                self.if_f0 = cpt.get("f0", 1)
-                self.version = cpt.get("version", "v1")
-                if self.is_half:
-                    self.net_g = self.net_g.half()
-                else:
-                    self.net_g = self.net_g.float()
-
-            def set_jit_model():
-                jit_pth_path = self.pth_path.rstrip(".pth")
-                jit_pth_path += ".half.jit" if self.is_half else ".jit"
-                reload = False
-                if str(self.device) == "cuda":
-                    self.device = torch.device("cuda:0")
-                if os.path.exists(jit_pth_path):
-                    cpt = jit.load(jit_pth_path)
-                    model_device = cpt["device"]
-                    if model_device != str(self.device):
-                        reload = True
-                else:
-                    reload = True
-
-                if reload:
-                    cpt = jit.synthesizer_jit_export(
-                        self.pth_path,
-                        "script",
-                        None,
-                        device=self.device,
-                        is_half=self.is_half,
-                    )
-
-                self.tgt_sr = cpt["config"][-1]
-                self.if_f0 = cpt.get("f0", 1)
-                self.version = cpt.get("version", "v1")
-                self.net_g = torch.jit.load(
-                    BytesIO(cpt["model"]), map_location=self.device
-                )
-                self.net_g.infer = self.net_g.forward
-                self.net_g.eval().to(self.device)
-
-            def set_synthesizer():
-                if self.use_jit and not config.dml:
-                    if self.is_half and "cpu" in str(self.device):
-                        printt(
-                            "Use default Synthesizer model. \
-                                    Jit is not supported on the CPU for half floating point"
-                        )
-                        set_default_model()
-                    else:
-                        set_jit_model()
-                else:
-                    set_default_model()
-
-            if last_rvc is None or last_rvc.pth_path != self.pth_path:
-                set_synthesizer()
-            else:
-                self.tgt_sr = last_rvc.tgt_sr
-                self.if_f0 = last_rvc.if_f0
-                self.version = last_rvc.version
-                self.is_half = last_rvc.is_half
-                if last_rvc.use_jit != self.use_jit:
-                    set_synthesizer()
-                else:
-                    self.net_g = last_rvc.net_g
-
-            if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"):
-                self.model_rmvpe = last_rvc.model_rmvpe
-            if last_rvc is not None and hasattr(last_rvc, "model_fcpe"):
-                self.device_fcpe = last_rvc.device_fcpe
-                self.model_fcpe = last_rvc.model_fcpe
-        except:
-            printt(traceback.format_exc())
-
-    def change_key(self, new_key):
-        self.f0_up_key = new_key
-
-    def change_index_rate(self, new_index_rate):
-        if new_index_rate != 0 and self.index_rate == 0:
-            self.index = faiss.read_index(self.index_path)
-            self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
-            printt("Index search enabled")
-        self.index_rate = new_index_rate
-
-    def get_f0_post(self, f0):
-        if not torch.is_tensor(f0):
-            f0 = torch.from_numpy(f0)
-        f0 = f0.float().to(self.device).squeeze()
-        f0_mel = 1127 * torch.log(1 + f0 / 700)
-        f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
-            self.f0_mel_max - self.f0_mel_min
-        ) + 1
-        f0_mel[f0_mel <= 1] = 1
-        f0_mel[f0_mel > 255] = 255
-        f0_coarse = torch.round(f0_mel).long()
-        return f0_coarse, f0
-
-    def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
-        n_cpu = int(n_cpu)
-        if method == "crepe":
-            return self.get_f0_crepe(x, f0_up_key)
-        if method == "rmvpe":
-            return self.get_f0_rmvpe(x, f0_up_key)
-        if method == "fcpe":
-            return self.get_f0_fcpe(x, f0_up_key)
-        x = x.cpu().numpy()
-        if method == "pm":
-            p_len = x.shape[0] // 160 + 1
-            f0_min = 65
-            l_pad = int(np.ceil(1.5 / f0_min * 16000))
-            r_pad = l_pad + 1
-            s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac(
-                time_step=0.01,
-                voicing_threshold=0.6,
-                pitch_floor=f0_min,
-                pitch_ceiling=1100,
-            )
-            assert np.abs(s.t1 - 1.5 / f0_min) < 0.001
-            f0 = s.selected_array["frequency"]
-            if len(f0) < p_len:
-                f0 = np.pad(f0, (0, p_len - len(f0)))
-            f0 = f0[:p_len]
-            f0 *= pow(2, f0_up_key / 12)
-            return self.get_f0_post(f0)
-        if n_cpu == 1:
-            f0, t = pyworld.harvest(
-                x.astype(np.double),
-                fs=16000,
-                f0_ceil=1100,
-                f0_floor=50,
-                frame_period=10,
-            )
-            f0 = signal.medfilt(f0, 3)
-            f0 *= pow(2, f0_up_key / 12)
-            return self.get_f0_post(f0)
-        f0bak = np.zeros(x.shape[0] // 160 + 1, dtype=np.float64)
-        length = len(x)
-        part_length = 160 * ((length // 160 - 1) // n_cpu + 1)
-        n_cpu = (length // 160 - 1) // (part_length // 160) + 1
-        ts = ttime()
-        res_f0 = mm.dict()
-        for idx in range(n_cpu):
-            tail = part_length * (idx + 1) + 320
-            if idx == 0:
-                self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
-            else:
-                self.inp_q.put(
-                    (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
-                )
-        while 1:
-            res_ts = self.opt_q.get()
-            if res_ts == ts:
-                break
-        f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
-        for idx, f0 in enumerate(f0s):
-            if idx == 0:
-                f0 = f0[:-3]
-            elif idx != n_cpu - 1:
-                f0 = f0[2:-3]
-            else:
-                f0 = f0[2:]
-            f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = (
-                f0
-            )
-        f0bak = signal.medfilt(f0bak, 3)
-        f0bak *= pow(2, f0_up_key / 12)
-        return self.get_f0_post(f0bak)
-
-    def get_f0_crepe(self, x, f0_up_key):
-        if "privateuseone" in str(
-            self.device
-        ):  ###不支持dml，cpu又太慢用不成，拿fcpe顶替
-            return self.get_f0(x, f0_up_key, 1, "fcpe")
-        # printt("using crepe,device:%s"%self.device)
-        f0, pd = torchcrepe.predict(
-            x.unsqueeze(0).float(),
-            16000,
-            160,
-            self.f0_min,
-            self.f0_max,
-            "full",
-            batch_size=512,
-            # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用
-            device=self.device,
-            return_periodicity=True,
-        )
-        pd = torchcrepe.filter.median(pd, 3)
-        f0 = torchcrepe.filter.mean(f0, 3)
-        f0[pd < 0.1] = 0
-        f0 *= pow(2, f0_up_key / 12)
-        return self.get_f0_post(f0)
-
-    def get_f0_rmvpe(self, x, f0_up_key):
-        if hasattr(self, "model_rmvpe") == False:
-            from infer.lib.rmvpe import RMVPE
-
-            printt("Loading rmvpe model")
-            self.model_rmvpe = RMVPE(
-                "assets/rmvpe/rmvpe.pt",
-                is_half=self.is_half,
-                device=self.device,
-                use_jit=self.config.use_jit,
-            )
-        f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
-        f0 *= pow(2, f0_up_key / 12)
-        return self.get_f0_post(f0)
-
-    def get_f0_fcpe(self, x, f0_up_key):
-        if hasattr(self, "model_fcpe") == False:
-            from torchfcpe import spawn_bundled_infer_model
-
-            printt("Loading fcpe model")
-            if "privateuseone" in str(self.device):
-                self.device_fcpe = "cpu"
-            else:
-                self.device_fcpe = self.device
-            self.model_fcpe = spawn_bundled_infer_model(self.device_fcpe)
-        f0 = self.model_fcpe.infer(
-            x.to(self.device_fcpe).unsqueeze(0).float(),
-            sr=16000,
-            decoder_mode="local_argmax",
-            threshold=0.006,
-        )
-        f0 *= pow(2, f0_up_key / 12)
-        return self.get_f0_post(f0)
-
-    def infer(
-        self,
-        input_wav: torch.Tensor,
-        block_frame_16k,
-        skip_head,
-        return_length,
-        f0method,
-    ) -> np.ndarray:
-        t1 = ttime()
-        with torch.no_grad():
-            if self.config.is_half:
-                feats = input_wav.half().view(1, -1)
-            else:
-                feats = input_wav.float().view(1, -1)
-            padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
-            inputs = {
-                "source": feats,
-                "padding_mask": padding_mask,
-                "output_layer": 9 if self.version == "v1" else 12,
-            }
-            logits = self.model.extract_features(**inputs)
-            feats = (
-                self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
-            )
-            feats = torch.cat((feats, feats[:, -1:, :]), 1)
-        t2 = ttime()
-        try:
-            if hasattr(self, "index") and self.index_rate != 0:
-                npy = feats[0][skip_head // 2 :].cpu().numpy().astype("float32")
-                score, ix = self.index.search(npy, k=8)
-                if (ix >= 0).all():
-                    weight = np.square(1 / score)
-                    weight /= weight.sum(axis=1, keepdims=True)
-                    npy = np.sum(
-                        self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1
-                    )
-                    if self.config.is_half:
-                        npy = npy.astype("float16")
-                    feats[0][skip_head // 2 :] = (
-                        torch.from_numpy(npy).unsqueeze(0).to(self.device)
-                        * self.index_rate
-                        + (1 - self.index_rate) * feats[0][skip_head // 2 :]
-                    )
-                else:
-                    printt(
-                        "Invalid index. You MUST use added_xxxx.index but not trained_xxxx.index!"
-                    )
-            else:
-                printt("Index search FAILED or disabled")
-        except:
-            traceback.print_exc()
-            printt("Index search FAILED")
-        t3 = ttime()
-        p_len = input_wav.shape[0] // 160
-        if self.if_f0 == 1:
-            f0_extractor_frame = block_frame_16k + 800
-            if f0method == "rmvpe":
-                f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
-            pitch, pitchf = self.get_f0(
-                input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method
-            )
-            shift = block_frame_16k // 160
-            self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
-            self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone()
-            self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1]
-            self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1]
-            cache_pitch = self.cache_pitch[None, -p_len:]
-            cache_pitchf = self.cache_pitchf[None, -p_len:]
-        t4 = ttime()
-        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
-        feats = feats[:, :p_len, :]
-        p_len = torch.LongTensor([p_len]).to(self.device)
-        sid = torch.LongTensor([0]).to(self.device)
-        skip_head = torch.LongTensor([skip_head])
-        return_length = torch.LongTensor([return_length])
-        with torch.no_grad():
-            if self.if_f0 == 1:
-                infered_audio, _, _ = self.net_g.infer(
-                    feats,
-                    p_len,
-                    cache_pitch,
-                    cache_pitchf,
-                    sid,
-                    skip_head,
-                    return_length,
-                )
-            else:
-                infered_audio, _, _ = self.net_g.infer(
-                    feats, p_len, sid, skip_head, return_length
-                )
-        t5 = ttime()
-        printt(
-            "Spent time: fea = %.3fs, index = %.3fs, f0 = %.3fs, model = %.3fs",
-            t2 - t1,
-            t3 - t2,
-            t4 - t3,
-            t5 - t4,
-        )
-        return infered_audio.squeeze().float()
diff --git a/venv.sh b/venv.sh
deleted file mode 100755
index 577283b..0000000
--- a/venv.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-python3.8 -m venv .venv