diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb deleted file mode 100644 index b38d8d2..0000000 --- a/Retrieval_based_Voice_Conversion_WebUI.ipynb +++ /dev/null @@ -1,403 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "ZFFCx5J80SGa" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GmFP6bN9dvOq" - }, - "outputs": [], - "source": [ - "# @title 查看显卡\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jwu07JgqoFON" - }, - "outputs": [], - "source": [ - "# @title 挂载谷歌云盘\n", - "\n", - "from google.colab import drive\n", - "\n", - "drive.mount(\"/content/drive\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wjddIFr1oS3W" - }, - "outputs": [], - "source": [ - "# @title 安装依赖\n", - "!apt-get -y install build-essential python3-dev ffmpeg\n", - "!pip3 install --upgrade setuptools wheel\n", - "!pip3 install --upgrade pip\n", - "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ge_97mfpgqTm" - }, - "outputs": [], - "source": [ - "# @title 克隆仓库\n", - "\n", - "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n", - "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "!mkdir -p pretrained uvr5_weights" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BLDEZADkvlw1" - }, - "outputs": [], - "source": [ - "# @title 更新仓库(一般无需执行)\n", - "!git pull" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pqE0PrnuRqI2" - }, - "outputs": [], - "source": [ - "# @title 安装aria2\n", - "!apt -y install -qq aria2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UG3XpUwEomUz" - }, - "outputs": [], - "source": [ - "# @title 下载底模\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HugjmZqZRuiF" - }, - "outputs": [], - "source": [ - "# @title 下载人声分离模型\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2RCaT9FTR0ej" - }, - "outputs": [], - "source": [ - "# @title 下载hubert_base\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# @title #下载rmvpe模型\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Mwk7Q0Loqzjx" - }, - "outputs": [], - "source": [ - "# @title 从谷歌云盘加载打包好的数据集到/content/dataset\n", - "\n", - "# @markdown 数据集位置\n", - "DATASET = (\n", - " \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" # @param {type:\"string\"}\n", - ")\n", - "\n", - "!mkdir -p /content/dataset\n", - "!unzip -d /content/dataset -B {DATASET}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PDlFxWHWEynD" - }, - "outputs": [], - "source": [ - "# @title 重命名数据集中的重名文件\n", - "!ls -a /content/dataset/\n", - "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7vh6vphDwO0b" - }, - "outputs": [], - "source": [ - "# @title 启动web\n", - "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "# %load_ext tensorboard\n", - "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", - "!python3 infer-web.py --colab --pycmd python3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FgJuNeAwx5Y_" - }, - "outputs": [], - "source": [ - "# @title 手动将训练后的模型文件备份到谷歌云盘\n", - "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", - "\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n", - "\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OVQoLQJXS7WX" - }, - "outputs": [], - "source": [ - "# @title 从谷歌云盘恢复pth\n", - "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", - "\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 模型epoch\n", - "MODELEPOCH = 7500 # @param {type:\"integer\"}\n", - "\n", - "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "\n", - "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "!cp /content/drive/MyDrive/*.index /content/\n", - "!cp /content/drive/MyDrive/*.npy /content/\n", - "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZKAyuKb9J6dz" - }, - "outputs": [], - "source": [ - "# @title 手动预处理(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 采样率\n", - "BITRATE = 48000 # @param {type:\"integer\"}\n", - "# @markdown 使用的进程数\n", - "THREADCOUNT = 8 # @param {type:\"integer\"}\n", - "\n", - "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CrxJqzAUKmPJ" - }, - "outputs": [], - "source": [ - "# @title 手动提取特征(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 使用的进程数\n", - "THREADCOUNT = 8 # @param {type:\"integer\"}\n", - "# @markdown 音高提取算法\n", - "ALGO = \"harvest\" # @param {type:\"string\"}\n", - "\n", - "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", - "\n", - "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IMLPLKOaKj58" - }, - "outputs": [], - "source": [ - "# @title 手动训练(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 使用的GPU\n", - "USEGPU = \"0\" # @param {type:\"string\"}\n", - "# @markdown 批大小\n", - "BATCHSIZE = 32 # @param {type:\"integer\"}\n", - "# @markdown 停止的epoch\n", - "MODELEPOCH = 3200 # @param {type:\"integer\"}\n", - "# @markdown 保存epoch间隔\n", - "EPOCHSAVE = 100 # @param {type:\"integer\"}\n", - "# @markdown 采样率\n", - "MODELSAMPLE = \"48k\" # @param {type:\"string\"}\n", - "# @markdown 是否缓存训练集\n", - "CACHEDATA = 1 # @param {type:\"integer\"}\n", - "# @markdown 是否仅保存最新的ckpt文件\n", - "ONLYLATEST = 0 # @param {type:\"integer\"}\n", - "\n", - "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "haYA81hySuDl" - }, - "outputs": [], - "source": [ - "# @title 删除其它pth,只留选中的(慎点,仔细看代码)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 选中模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!echo \"备份选中的模型。。。\"\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "\n", - "!echo \"正在删除。。。\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n", - "\n", - "!echo \"恢复选中的模型。。。\"\n", - "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "\n", - "!echo \"删除完成\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QhSiPTVPoIRh" - }, - "outputs": [], - "source": [ - "# @title 清除项目下所有文件,只留选中的模型(慎点,仔细看代码)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 选中模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!echo \"备份选中的模型。。。\"\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "\n", - "!echo \"正在删除。。。\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n", - "\n", - "!echo \"恢复选中的模型。。。\"\n", - "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "\n", - "!echo \"删除完成\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "private_outputs": true, - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb b/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb deleted file mode 100644 index 0cad19f..0000000 --- a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb +++ /dev/null @@ -1,422 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "ZFFCx5J80SGa" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GmFP6bN9dvOq" - }, - "outputs": [], - "source": [ - "# @title #查看显卡\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jwu07JgqoFON" - }, - "outputs": [], - "source": [ - "# @title 挂载谷歌云盘\n", - "\n", - "from google.colab import drive\n", - "\n", - "drive.mount(\"/content/drive\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wjddIFr1oS3W" - }, - "outputs": [], - "source": [ - "# @title #安装依赖\n", - "!apt-get -y install build-essential python3-dev ffmpeg\n", - "!pip3 install --upgrade setuptools wheel\n", - "!pip3 install --upgrade pip\n", - "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ge_97mfpgqTm" - }, - "outputs": [], - "source": [ - "# @title #克隆仓库\n", - "\n", - "!mkdir Retrieval-based-Voice-Conversion-WebUI\n", - "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "!git init\n", - "!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\n", - "!git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1\n", - "!git reset --hard FETCH_HEAD" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BLDEZADkvlw1" - }, - "outputs": [], - "source": [ - "# @title #更新仓库(一般无需执行)\n", - "!git pull" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pqE0PrnuRqI2" - }, - "outputs": [], - "source": [ - "# @title #安装aria2\n", - "!apt -y install -qq aria2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UG3XpUwEomUz" - }, - "outputs": [], - "source": [ - "# @title 下载底模\n", - "\n", - "# v1\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n", - "\n", - "# v2\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n", - "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HugjmZqZRuiF" - }, - "outputs": [], - "source": [ - "# @title #下载人声分离模型\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2RCaT9FTR0ej" - }, - "outputs": [], - "source": [ - "# @title #下载hubert_base\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# @title #下载rmvpe模型\n", - "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Mwk7Q0Loqzjx" - }, - "outputs": [], - "source": [ - "# @title #从谷歌云盘加载打包好的数据集到/content/dataset\n", - "\n", - "# @markdown 数据集位置\n", - "DATASET = (\n", - " \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" # @param {type:\"string\"}\n", - ")\n", - "\n", - "!mkdir -p /content/dataset\n", - "!unzip -d /content/dataset -B {DATASET}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PDlFxWHWEynD" - }, - "outputs": [], - "source": [ - "# @title #重命名数据集中的重名文件\n", - "!ls -a /content/dataset/\n", - "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7vh6vphDwO0b" - }, - "outputs": [], - "source": [ - "# @title #启动webui\n", - "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "# %load_ext tensorboard\n", - "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", - "!python3 infer-web.py --colab --pycmd python3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FgJuNeAwx5Y_" - }, - "outputs": [], - "source": [ - "# @title #手动将训练后的模型文件备份到谷歌云盘\n", - "# @markdown #需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", - "\n", - "# @markdown #模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown #模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n", - "\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OVQoLQJXS7WX" - }, - "outputs": [], - "source": [ - "# @title 从谷歌云盘恢复pth\n", - "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", - "\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 模型epoch\n", - "MODELEPOCH = 7500 # @param {type:\"integer\"}\n", - "\n", - "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "\n", - "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "!cp /content/drive/MyDrive/*.index /content/\n", - "!cp /content/drive/MyDrive/*.npy /content/\n", - "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZKAyuKb9J6dz" - }, - "outputs": [], - "source": [ - "# @title 手动预处理(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 采样率\n", - "BITRATE = 48000 # @param {type:\"integer\"}\n", - "# @markdown 使用的进程数\n", - "THREADCOUNT = 8 # @param {type:\"integer\"}\n", - "\n", - "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CrxJqzAUKmPJ" - }, - "outputs": [], - "source": [ - "# @title 手动提取特征(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 使用的进程数\n", - "THREADCOUNT = 8 # @param {type:\"integer\"}\n", - "# @markdown 音高提取算法\n", - "ALGO = \"harvest\" # @param {type:\"string\"}\n", - "\n", - "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", - "\n", - "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IMLPLKOaKj58" - }, - "outputs": [], - "source": [ - "# @title 手动训练(不推荐)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 使用的GPU\n", - "USEGPU = \"0\" # @param {type:\"string\"}\n", - "# @markdown 批大小\n", - "BATCHSIZE = 32 # @param {type:\"integer\"}\n", - "# @markdown 停止的epoch\n", - "MODELEPOCH = 3200 # @param {type:\"integer\"}\n", - "# @markdown 保存epoch间隔\n", - "EPOCHSAVE = 100 # @param {type:\"integer\"}\n", - "# @markdown 采样率\n", - "MODELSAMPLE = \"48k\" # @param {type:\"string\"}\n", - "# @markdown 是否缓存训练集\n", - "CACHEDATA = 1 # @param {type:\"integer\"}\n", - "# @markdown 是否仅保存最新的ckpt文件\n", - "ONLYLATEST = 0 # @param {type:\"integer\"}\n", - "\n", - "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "haYA81hySuDl" - }, - "outputs": [], - "source": [ - "# @title 删除其它pth,只留选中的(慎点,仔细看代码)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 选中模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!echo \"备份选中的模型。。。\"\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "\n", - "!echo \"正在删除。。。\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n", - "\n", - "!echo \"恢复选中的模型。。。\"\n", - "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "\n", - "!echo \"删除完成\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QhSiPTVPoIRh" - }, - "outputs": [], - "source": [ - "# @title 清除项目下所有文件,只留选中的模型(慎点,仔细看代码)\n", - "# @markdown 模型名\n", - "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", - "# @markdown 选中模型epoch\n", - "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", - "\n", - "!echo \"备份选中的模型。。。\"\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", - "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", - "\n", - "!echo \"正在删除。。。\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", - "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n", - "\n", - "!echo \"恢复选中的模型。。。\"\n", - "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", - "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", - "\n", - "!echo \"删除完成\"\n", - "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "private_outputs": true, - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/api_231006.py b/api_231006.py deleted file mode 100644 index 56e26e2..0000000 --- a/api_231006.py +++ /dev/null @@ -1,440 +0,0 @@ -#api for 231006 release version by Xiaokai -import os -import sys -import json -import re -import time -import librosa -import torch -import numpy as np -import torch.nn.functional as F -import torchaudio.transforms as tat -import sounddevice as sd -from dotenv import load_dotenv -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -import threading -import uvicorn -import logging - -# Initialize the logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# Define FastAPI app -app = FastAPI() - -class GUIConfig: - def __init__(self) -> None: - self.pth_path: str = "" - self.index_path: str = "" - self.pitch: int = 0 - self.samplerate: int = 40000 - self.block_time: float = 1.0 # s - self.buffer_num: int = 1 - self.threhold: int = -60 - self.crossfade_time: float = 0.05 - self.extra_time: float = 2.5 - self.I_noise_reduce = False - self.O_noise_reduce = False - self.rms_mix_rate = 0.0 - self.index_rate = 0.3 - self.f0method = "rmvpe" - self.sg_input_device = "" - self.sg_output_device = "" - -class ConfigData(BaseModel): - pth_path: str - index_path: str - sg_input_device: str - sg_output_device: str - threhold: int = -60 - pitch: int = 0 - index_rate: float = 0.3 - rms_mix_rate: float = 0.0 - block_time: float = 0.25 - crossfade_length: float = 0.05 - extra_time: float = 2.5 - n_cpu: int = 4 - I_noise_reduce: bool = False - O_noise_reduce: bool = False - -class AudioAPI: - def __init__(self) -> None: - self.gui_config = GUIConfig() - self.config = None # Initialize Config object as None - self.flag_vc = False - self.function = "vc" - self.delay_time = 0 - self.rvc = None # Initialize RVC object as None - - def load(self): - input_devices, output_devices, _, _ = self.get_devices() - try: - with open("configs/config.json", "r", encoding='utf-8') as j: - data = json.load(j) - data["rmvpe"] = True # Ensure rmvpe is the only f0method - if data["sg_input_device"] not in input_devices: - data["sg_input_device"] = input_devices[sd.default.device[0]] - if data["sg_output_device"] not in output_devices: - data["sg_output_device"] = output_devices[sd.default.device[1]] - except Exception as e: - logger.error(f"Failed to load configuration: {e}") - with open("configs/config.json", "w", encoding='utf-8') as j: - data = { - "pth_path": " ", - "index_path": " ", - "sg_input_device": input_devices[sd.default.device[0]], - "sg_output_device": output_devices[sd.default.device[1]], - "threhold": "-60", - "pitch": "0", - "index_rate": "0", - "rms_mix_rate": "0", - "block_time": "0.25", - "crossfade_length": "0.05", - "extra_time": "2.5", - "f0method": "rmvpe", - "use_jit": False, - } - data["rmvpe"] = True # Ensure rmvpe is the only f0method - json.dump(data, j, ensure_ascii=False) - return data - - def set_values(self, values): - logger.info(f"Setting values: {values}") - if not values.pth_path.strip(): - raise HTTPException(status_code=400, detail="Please select a .pth file") - if not values.index_path.strip(): - raise HTTPException(status_code=400, detail="Please select an index file") - self.set_devices(values.sg_input_device, values.sg_output_device) - self.config.use_jit = False - self.gui_config.pth_path = values.pth_path - self.gui_config.index_path = values.index_path - self.gui_config.threhold = values.threhold - self.gui_config.pitch = values.pitch - self.gui_config.block_time = values.block_time - self.gui_config.crossfade_time = values.crossfade_length - self.gui_config.extra_time = values.extra_time - self.gui_config.I_noise_reduce = values.I_noise_reduce - self.gui_config.O_noise_reduce = values.O_noise_reduce - self.gui_config.rms_mix_rate = values.rms_mix_rate - self.gui_config.index_rate = values.index_rate - self.gui_config.n_cpu = values.n_cpu - self.gui_config.f0method = "rmvpe" - return True - - def start_vc(self): - torch.cuda.empty_cache() - self.flag_vc = True - self.rvc = rvc_for_realtime.RVC( - self.gui_config.pitch, - self.gui_config.pth_path, - self.gui_config.index_path, - self.gui_config.index_rate, - 0, - 0, - 0, - self.config, - self.rvc if self.rvc else None, - ) - self.gui_config.samplerate = self.rvc.tgt_sr - self.zc = self.rvc.tgt_sr // 100 - self.block_frame = ( - int( - np.round( - self.gui_config.block_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.block_frame_16k = 160 * self.block_frame // self.zc - self.crossfade_frame = ( - int( - np.round( - self.gui_config.crossfade_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.sola_search_frame = self.zc - self.extra_frame = ( - int( - np.round( - self.gui_config.extra_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.input_wav = torch.zeros( - self.extra_frame + self.crossfade_frame + self.sola_search_frame + self.block_frame, - device=self.config.device, - dtype=torch.float32, - ) - self.input_wav_res = torch.zeros( - 160 * self.input_wav.shape[0] // self.zc, - device=self.config.device, - dtype=torch.float32, - ) - self.pitch = np.zeros(self.input_wav.shape[0] // self.zc, dtype="int32") - self.pitchf = np.zeros(self.input_wav.shape[0] // self.zc, dtype="float64") - self.sola_buffer = torch.zeros(self.crossfade_frame, device=self.config.device, dtype=torch.float32) - self.nr_buffer = self.sola_buffer.clone() - self.output_buffer = self.input_wav.clone() - self.res_buffer = torch.zeros(2 * self.zc, device=self.config.device, dtype=torch.float32) - self.valid_rate = 1 - (self.extra_frame - 1) / self.input_wav.shape[0] - self.fade_in_window = ( - torch.sin(0.5 * np.pi * torch.linspace(0.0, 1.0, steps=self.crossfade_frame, device=self.config.device, dtype=torch.float32)) ** 2 - ) - self.fade_out_window = 1 - self.fade_in_window - self.resampler = tat.Resample( - orig_freq=self.gui_config.samplerate, - new_freq=16000, - dtype=torch.float32, - ).to(self.config.device) - self.tg = TorchGate( - sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9 - ).to(self.config.device) - thread_vc = threading.Thread(target=self.soundinput) - thread_vc.start() - - def soundinput(self): - channels = 1 if sys.platform == "darwin" else 2 - with sd.Stream( - channels=channels, - callback=self.audio_callback, - blocksize=self.block_frame, - samplerate=self.gui_config.samplerate, - dtype="float32", - ) as stream: - global stream_latency - stream_latency = stream.latency[-1] - while self.flag_vc: - time.sleep(self.gui_config.block_time) - logger.info("Audio block passed.") - logger.info("Ending VC") - - def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status): - start_time = time.perf_counter() - indata = librosa.to_mono(indata.T) - if self.gui_config.threhold > -60: - rms = librosa.feature.rms(y=indata, frame_length=4 * self.zc, hop_length=self.zc) - db_threhold = (librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold) - for i in range(db_threhold.shape[0]): - if db_threhold[i]: - indata[i * self.zc : (i + 1) * self.zc] = 0 - self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone() - self.input_wav[-self.block_frame :] = torch.from_numpy(indata).to(self.config.device) - self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone() - if self.gui_config.I_noise_reduce and self.function == "vc": - input_wav = self.input_wav[-self.crossfade_frame - self.block_frame - 2 * self.zc :] - input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0))[0, 2 * self.zc :] - input_wav[: self.crossfade_frame] *= self.fade_in_window - input_wav[: self.crossfade_frame] += self.nr_buffer * self.fade_out_window - self.nr_buffer[:] = input_wav[-self.crossfade_frame :] - input_wav = torch.cat((self.res_buffer[:], input_wav[: self.block_frame])) - self.res_buffer[:] = input_wav[-2 * self.zc :] - self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(input_wav)[160:] - else: - self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(self.input_wav[-self.block_frame - 2 * self.zc :])[160:] - if self.function == "vc": - f0_extractor_frame = self.block_frame_16k + 800 - if self.gui_config.f0method == "rmvpe": - f0_extractor_frame = (5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160) - infer_wav = self.rvc.infer( - self.input_wav_res, - self.input_wav_res[-f0_extractor_frame:].cpu().numpy(), - self.block_frame_16k, - self.valid_rate, - self.pitch, - self.pitchf, - self.gui_config.f0method, - ) - infer_wav = infer_wav[-self.crossfade_frame - self.sola_search_frame - self.block_frame :] - else: - infer_wav = self.input_wav[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].clone() - if (self.gui_config.O_noise_reduce and self.function == "vc") or (self.gui_config.I_noise_reduce and self.function == "im"): - self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone() - self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :] - infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0) - if self.gui_config.rms_mix_rate < 1 and self.function == "vc": - rms1 = librosa.feature.rms(y=self.input_wav_res[-160 * infer_wav.shape[0] // self.zc :].cpu().numpy(), frame_length=640, hop_length=160) - rms1 = torch.from_numpy(rms1).to(self.config.device) - rms1 = F.interpolate(rms1.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear", align_corners=True)[0, 0, :-1] - rms2 = librosa.feature.rms(y=infer_wav[:].cpu().numpy(), frame_length=4 * self.zc, hop_length=self.zc) - rms2 = torch.from_numpy(rms2).to(self.config.device) - rms2 = F.interpolate(rms2.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear", align_corners=True)[0, 0, :-1] - rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3) - infer_wav *= torch.pow(rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate)) - conv_input = infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame] - cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :]) - cor_den = torch.sqrt(F.conv1d(conv_input**2, torch.ones(1, 1, self.crossfade_frame, device=self.config.device)) + 1e-8) - if sys.platform == "darwin": - _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0]) - sola_offset = sola_offset.item() - else: - sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) - logger.info(f"sola_offset = {sola_offset}") - infer_wav = infer_wav[sola_offset : sola_offset + self.block_frame + self.crossfade_frame] - infer_wav[: self.crossfade_frame] *= self.fade_in_window - infer_wav[: self.crossfade_frame] += self.sola_buffer * self.fade_out_window - self.sola_buffer[:] = infer_wav[-self.crossfade_frame :] - if sys.platform == "darwin": - outdata[:] = infer_wav[: -self.crossfade_frame].cpu().numpy()[:, np.newaxis] - else: - outdata[:] = infer_wav[: -self.crossfade_frame].repeat(2, 1).t().cpu().numpy() - total_time = time.perf_counter() - start_time - logger.info(f"Infer time: {total_time:.2f}") - - def get_devices(self, update: bool = True): - if update: - sd._terminate() - sd._initialize() - devices = sd.query_devices() - hostapis = sd.query_hostapis() - for hostapi in hostapis: - for device_idx in hostapi["devices"]: - devices[device_idx]["hostapi_name"] = hostapi["name"] - input_devices = [ - f"{d['name']} ({d['hostapi_name']})" - for d in devices - if d["max_input_channels"] > 0 - ] - output_devices = [ - f"{d['name']} ({d['hostapi_name']})" - for d in devices - if d["max_output_channels"] > 0 - ] - input_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_input_channels"] > 0 - ] - output_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_output_channels"] > 0 - ] - return ( - input_devices, - output_devices, - input_devices_indices, - output_devices_indices, - ) - - def set_devices(self, input_device, output_device): - ( - input_devices, - output_devices, - input_device_indices, - output_device_indices, - ) = self.get_devices() - logger.debug(f"Available input devices: {input_devices}") - logger.debug(f"Available output devices: {output_devices}") - logger.debug(f"Selected input device: {input_device}") - logger.debug(f"Selected output device: {output_device}") - - if input_device not in input_devices: - logger.error(f"Input device '{input_device}' is not in the list of available devices") - raise HTTPException(status_code=400, detail=f"Input device '{input_device}' is not available") - - if output_device not in output_devices: - logger.error(f"Output device '{output_device}' is not in the list of available devices") - raise HTTPException(status_code=400, detail=f"Output device '{output_device}' is not available") - - sd.default.device[0] = input_device_indices[input_devices.index(input_device)] - sd.default.device[1] = output_device_indices[output_devices.index(output_device)] - logger.info(f"Input device set to {sd.default.device[0]}: {input_device}") - logger.info(f"Output device set to {sd.default.device[1]}: {output_device}") - -audio_api = AudioAPI() - -@app.get("/inputDevices", response_model=list) -def get_input_devices(): - try: - input_devices, _, _, _ = audio_api.get_devices() - return input_devices - except Exception as e: - logger.error(f"Failed to get input devices: {e}") - raise HTTPException(status_code=500, detail="Failed to get input devices") - -@app.get("/outputDevices", response_model=list) -def get_output_devices(): - try: - _, output_devices, _, _ = audio_api.get_devices() - return output_devices - except Exception as e: - logger.error(f"Failed to get output devices: {e}") - raise HTTPException(status_code=500, detail="Failed to get output devices") - -@app.post("/config") -def configure_audio(config_data: ConfigData): - try: - logger.info(f"Configuring audio with data: {config_data}") - if audio_api.set_values(config_data): - settings = config_data.dict() - settings["use_jit"] = False - settings["f0method"] = "rmvpe" - with open("configs/config.json", "w", encoding='utf-8') as j: - json.dump(settings, j, ensure_ascii=False) - logger.info("Configuration set successfully") - return {"message": "Configuration set successfully"} - except HTTPException as e: - logger.error(f"Configuration error: {e.detail}") - raise - except Exception as e: - logger.error(f"Configuration failed: {e}") - raise HTTPException(status_code=400, detail=f"Configuration failed: {e}") - -@app.post("/start") -def start_conversion(): - try: - if not audio_api.flag_vc: - audio_api.start_vc() - return {"message": "Audio conversion started"} - else: - logger.warning("Audio conversion already running") - raise HTTPException(status_code=400, detail="Audio conversion already running") - except HTTPException as e: - logger.error(f"Start conversion error: {e.detail}") - raise - except Exception as e: - logger.error(f"Failed to start conversion: {e}") - raise HTTPException(status_code=500, detail=f"Failed to start conversion: {e}") - -@app.post("/stop") -def stop_conversion(): - try: - if audio_api.flag_vc: - audio_api.flag_vc = False - global stream_latency - stream_latency = -1 - return {"message": "Audio conversion stopped"} - else: - logger.warning("Audio conversion not running") - raise HTTPException(status_code=400, detail="Audio conversion not running") - except HTTPException as e: - logger.error(f"Stop conversion error: {e.detail}") - raise - except Exception as e: - logger.error(f"Failed to stop conversion: {e}") - raise HTTPException(status_code=500, detail=f"Failed to stop conversion: {e}") - -if __name__ == "__main__": - if sys.platform == "win32": - from multiprocessing import freeze_support - freeze_support() - load_dotenv() - os.environ["OMP_NUM_THREADS"] = "4" - if sys.platform == "darwin": - os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" - from tools.torchgate import TorchGate - import tools.rvc_for_realtime as rvc_for_realtime - from configs.config import Config - audio_api.config = Config() - uvicorn.run(app, host="0.0.0.0", port=6242) diff --git a/api_240604.py b/api_240604.py deleted file mode 100644 index 08227ce..0000000 --- a/api_240604.py +++ /dev/null @@ -1,565 +0,0 @@ -#api for 240604 release version by Xiaokai -import os -import sys -import json -import re -import time -import librosa -import torch -import numpy as np -import torch.nn.functional as F -import torchaudio.transforms as tat -import sounddevice as sd -from dotenv import load_dotenv -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -import threading -import uvicorn -import logging -from multiprocessing import Queue, Process, cpu_count, freeze_support - -# Initialize the logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# Define FastAPI app -app = FastAPI() - -class GUIConfig: - def __init__(self) -> None: - self.pth_path: str = "" - self.index_path: str = "" - self.pitch: int = 0 - self.formant: float = 0.0 - self.sr_type: str = "sr_model" - self.block_time: float = 0.25 # s - self.threhold: int = -60 - self.crossfade_time: float = 0.05 - self.extra_time: float = 2.5 - self.I_noise_reduce: bool = False - self.O_noise_reduce: bool = False - self.use_pv: bool = False - self.rms_mix_rate: float = 0.0 - self.index_rate: float = 0.0 - self.n_cpu: int = 4 - self.f0method: str = "fcpe" - self.sg_input_device: str = "" - self.sg_output_device: str = "" - -class ConfigData(BaseModel): - pth_path: str - index_path: str - sg_input_device: str - sg_output_device: str - threhold: int = -60 - pitch: int = 0 - formant: float = 0.0 - index_rate: float = 0.3 - rms_mix_rate: float = 0.0 - block_time: float = 0.25 - crossfade_length: float = 0.05 - extra_time: float = 2.5 - n_cpu: int = 4 - I_noise_reduce: bool = False - O_noise_reduce: bool = False - use_pv: bool = False - f0method: str = "fcpe" - -class Harvest(Process): - def __init__(self, inp_q, opt_q): - super(Harvest, self).__init__() - self.inp_q = inp_q - self.opt_q = opt_q - - def run(self): - import numpy as np - import pyworld - while True: - idx, x, res_f0, n_cpu, ts = self.inp_q.get() - f0, t = pyworld.harvest( - x.astype(np.double), - fs=16000, - f0_ceil=1100, - f0_floor=50, - frame_period=10, - ) - res_f0[idx] = f0 - if len(res_f0.keys()) >= n_cpu: - self.opt_q.put(ts) - -class AudioAPI: - def __init__(self) -> None: - self.gui_config = GUIConfig() - self.config = None # Initialize Config object as None - self.flag_vc = False - self.function = "vc" - self.delay_time = 0 - self.rvc = None # Initialize RVC object as None - self.inp_q = None - self.opt_q = None - self.n_cpu = min(cpu_count(), 8) - - def initialize_queues(self): - self.inp_q = Queue() - self.opt_q = Queue() - for _ in range(self.n_cpu): - p = Harvest(self.inp_q, self.opt_q) - p.daemon = True - p.start() - - def load(self): - input_devices, output_devices, _, _ = self.get_devices() - try: - with open("configs/config.json", "r", encoding='utf-8') as j: - data = json.load(j) - if data["sg_input_device"] not in input_devices: - data["sg_input_device"] = input_devices[sd.default.device[0]] - if data["sg_output_device"] not in output_devices: - data["sg_output_device"] = output_devices[sd.default.device[1]] - except Exception as e: - logger.error(f"Failed to load configuration: {e}") - with open("configs/config.json", "w", encoding='utf-8') as j: - data = { - "pth_path": "", - "index_path": "", - "sg_input_device": input_devices[sd.default.device[0]], - "sg_output_device": output_devices[sd.default.device[1]], - "threhold": -60, - "pitch": 0, - "formant": 0.0, - "index_rate": 0, - "rms_mix_rate": 0, - "block_time": 0.25, - "crossfade_length": 0.05, - "extra_time": 2.5, - "n_cpu": 4, - "f0method": "fcpe", - "use_jit": False, - "use_pv": False, - } - json.dump(data, j, ensure_ascii=False) - return data - - def set_values(self, values): - logger.info(f"Setting values: {values}") - if not values.pth_path.strip(): - raise HTTPException(status_code=400, detail="Please select a .pth file") - if not values.index_path.strip(): - raise HTTPException(status_code=400, detail="Please select an index file") - self.set_devices(values.sg_input_device, values.sg_output_device) - self.config.use_jit = False - self.gui_config.pth_path = values.pth_path - self.gui_config.index_path = values.index_path - self.gui_config.threhold = values.threhold - self.gui_config.pitch = values.pitch - self.gui_config.formant = values.formant - self.gui_config.block_time = values.block_time - self.gui_config.crossfade_time = values.crossfade_length - self.gui_config.extra_time = values.extra_time - self.gui_config.I_noise_reduce = values.I_noise_reduce - self.gui_config.O_noise_reduce = values.O_noise_reduce - self.gui_config.rms_mix_rate = values.rms_mix_rate - self.gui_config.index_rate = values.index_rate - self.gui_config.n_cpu = values.n_cpu - self.gui_config.use_pv = values.use_pv - self.gui_config.f0method = values.f0method - return True - - def start_vc(self): - torch.cuda.empty_cache() - self.flag_vc = True - self.rvc = rvc_for_realtime.RVC( - self.gui_config.pitch, - self.gui_config.pth_path, - self.gui_config.index_path, - self.gui_config.index_rate, - self.gui_config.n_cpu, - self.inp_q, - self.opt_q, - self.config, - self.rvc if self.rvc else None, - ) - self.gui_config.samplerate = ( - self.rvc.tgt_sr - if self.gui_config.sr_type == "sr_model" - else self.get_device_samplerate() - ) - self.zc = self.gui_config.samplerate // 100 - self.block_frame = ( - int( - np.round( - self.gui_config.block_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.block_frame_16k = 160 * self.block_frame // self.zc - self.crossfade_frame = ( - int( - np.round( - self.gui_config.crossfade_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.sola_buffer_frame = min(self.crossfade_frame, 4 * self.zc) - self.sola_search_frame = self.zc - self.extra_frame = ( - int( - np.round( - self.gui_config.extra_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.input_wav = torch.zeros( - self.extra_frame - + self.crossfade_frame - + self.sola_search_frame - + self.block_frame, - device=self.config.device, - dtype=torch.float32, - ) - self.input_wav_denoise = self.input_wav.clone() - self.input_wav_res = torch.zeros( - 160 * self.input_wav.shape[0] // self.zc, - device=self.config.device, - dtype=torch.float32, - ) - self.rms_buffer = np.zeros(4 * self.zc, dtype="float32") - self.sola_buffer = torch.zeros( - self.sola_buffer_frame, device=self.config.device, dtype=torch.float32 - ) - self.nr_buffer = self.sola_buffer.clone() - self.output_buffer = self.input_wav.clone() - self.skip_head = self.extra_frame // self.zc - self.return_length = ( - self.block_frame + self.sola_buffer_frame + self.sola_search_frame - ) // self.zc - self.fade_in_window = ( - torch.sin( - 0.5 - * np.pi - * torch.linspace( - 0.0, - 1.0, - steps=self.sola_buffer_frame, - device=self.config.device, - dtype=torch.float32, - ) - ) - ** 2 - ) - self.fade_out_window = 1 - self.fade_in_window - self.resampler = tat.Resample( - orig_freq=self.gui_config.samplerate, - new_freq=16000, - dtype=torch.float32, - ).to(self.config.device) - if self.rvc.tgt_sr != self.gui_config.samplerate: - self.resampler2 = tat.Resample( - orig_freq=self.rvc.tgt_sr, - new_freq=self.gui_config.samplerate, - dtype=torch.float32, - ).to(self.config.device) - else: - self.resampler2 = None - self.tg = TorchGate( - sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9 - ).to(self.config.device) - thread_vc = threading.Thread(target=self.soundinput) - thread_vc.start() - - def soundinput(self): - channels = 1 if sys.platform == "darwin" else 2 - with sd.Stream( - channels=channels, - callback=self.audio_callback, - blocksize=self.block_frame, - samplerate=self.gui_config.samplerate, - dtype="float32", - ) as stream: - global stream_latency - stream_latency = stream.latency[-1] - while self.flag_vc: - time.sleep(self.gui_config.block_time) - logger.info("Audio block passed.") - logger.info("Ending VC") - - def audio_callback(self, indata: np.ndarray, outdata: np.ndarray, frames, times, status): - start_time = time.perf_counter() - indata = librosa.to_mono(indata.T) - if self.gui_config.threhold > -60: - indata = np.append(self.rms_buffer, indata) - rms = librosa.feature.rms(y=indata, frame_length=4 * self.zc, hop_length=self.zc)[:, 2:] - self.rms_buffer[:] = indata[-4 * self.zc :] - indata = indata[2 * self.zc - self.zc // 2 :] - db_threhold = ( - librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold - ) - for i in range(db_threhold.shape[0]): - if db_threhold[i]: - indata[i * self.zc : (i + 1) * self.zc] = 0 - indata = indata[self.zc // 2 :] - self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone() - self.input_wav[-indata.shape[0] :] = torch.from_numpy(indata).to(self.config.device) - self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone() - # input noise reduction and resampling - if self.gui_config.I_noise_reduce: - self.input_wav_denoise[: -self.block_frame] = self.input_wav_denoise[self.block_frame :].clone() - input_wav = self.input_wav[-self.sola_buffer_frame - self.block_frame :] - input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)).squeeze(0) - input_wav[: self.sola_buffer_frame] *= self.fade_in_window - input_wav[: self.sola_buffer_frame] += self.nr_buffer * self.fade_out_window - self.input_wav_denoise[-self.block_frame :] = input_wav[: self.block_frame] - self.nr_buffer[:] = input_wav[self.block_frame :] - self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler( - self.input_wav_denoise[-self.block_frame - 2 * self.zc :] - )[160:] - else: - self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = ( - self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[160:] - ) - # infer - if self.function == "vc": - infer_wav = self.rvc.infer( - self.input_wav_res, - self.block_frame_16k, - self.skip_head, - self.return_length, - self.gui_config.f0method, - ) - if self.resampler2 is not None: - infer_wav = self.resampler2(infer_wav) - elif self.gui_config.I_noise_reduce: - infer_wav = self.input_wav_denoise[self.extra_frame :].clone() - else: - infer_wav = self.input_wav[self.extra_frame :].clone() - # output noise reduction - if self.gui_config.O_noise_reduce and self.function == "vc": - self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone() - self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :] - infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0) - # volume envelop mixing - if self.gui_config.rms_mix_rate < 1 and self.function == "vc": - if self.gui_config.I_noise_reduce: - input_wav = self.input_wav_denoise[self.extra_frame :] - else: - input_wav = self.input_wav[self.extra_frame :] - rms1 = librosa.feature.rms( - y=input_wav[: infer_wav.shape[0]].cpu().numpy(), - frame_length=4 * self.zc, - hop_length=self.zc, - ) - rms1 = torch.from_numpy(rms1).to(self.config.device) - rms1 = F.interpolate( - rms1.unsqueeze(0), - size=infer_wav.shape[0] + 1, - mode="linear", - align_corners=True, - )[0, 0, :-1] - rms2 = librosa.feature.rms( - y=infer_wav[:].cpu().numpy(), - frame_length=4 * self.zc, - hop_length=self.zc, - ) - rms2 = torch.from_numpy(rms2).to(self.config.device) - rms2 = F.interpolate( - rms2.unsqueeze(0), - size=infer_wav.shape[0] + 1, - mode="linear", - align_corners=True, - )[0, 0, :-1] - rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3) - infer_wav *= torch.pow( - rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate) - ) - # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC - conv_input = infer_wav[None, None, : self.sola_buffer_frame + self.sola_search_frame] - cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :]) - cor_den = torch.sqrt( - F.conv1d( - conv_input**2, - torch.ones(1, 1, self.sola_buffer_frame, device=self.config.device), - ) - + 1e-8 - ) - if sys.platform == "darwin": - _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0]) - sola_offset = sola_offset.item() - else: - sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) - logger.info(f"sola_offset = {sola_offset}") - infer_wav = infer_wav[sola_offset:] - if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv: - infer_wav[: self.sola_buffer_frame] *= self.fade_in_window - infer_wav[: self.sola_buffer_frame] += self.sola_buffer * self.fade_out_window - else: - infer_wav[: self.sola_buffer_frame] = phase_vocoder( - self.sola_buffer, - infer_wav[: self.sola_buffer_frame], - self.fade_out_window, - self.fade_in_window, - ) - self.sola_buffer[:] = infer_wav[ - self.block_frame : self.block_frame + self.sola_buffer_frame - ] - if sys.platform == "darwin": - outdata[:] = infer_wav[: self.block_frame].cpu().numpy()[:, np.newaxis] - else: - outdata[:] = infer_wav[: self.block_frame].repeat(2, 1).t().cpu().numpy() - total_time = time.perf_counter() - start_time - logger.info(f"Infer time: {total_time:.2f}") - - def get_devices(self, update: bool = True): - if update: - sd._terminate() - sd._initialize() - devices = sd.query_devices() - hostapis = sd.query_hostapis() - for hostapi in hostapis: - for device_idx in hostapi["devices"]: - devices[device_idx]["hostapi_name"] = hostapi["name"] - input_devices = [ - f"{d['name']} ({d['hostapi_name']})" - for d in devices - if d["max_input_channels"] > 0 - ] - output_devices = [ - f"{d['name']} ({d['hostapi_name']})" - for d in devices - if d["max_output_channels"] > 0 - ] - input_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_input_channels"] > 0 - ] - output_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_output_channels"] > 0 - ] - return ( - input_devices, - output_devices, - input_devices_indices, - output_devices_indices, - ) - - def set_devices(self, input_device, output_device): - ( - input_devices, - output_devices, - input_device_indices, - output_device_indices, - ) = self.get_devices() - logger.debug(f"Available input devices: {input_devices}") - logger.debug(f"Available output devices: {output_devices}") - logger.debug(f"Selected input device: {input_device}") - logger.debug(f"Selected output device: {output_device}") - - if input_device not in input_devices: - logger.error(f"Input device '{input_device}' is not in the list of available devices") - raise HTTPException(status_code=400, detail=f"Input device '{input_device}' is not available") - - if output_device not in output_devices: - logger.error(f"Output device '{output_device}' is not in the list of available devices") - raise HTTPException(status_code=400, detail=f"Output device '{output_device}' is not available") - - sd.default.device[0] = input_device_indices[input_devices.index(input_device)] - sd.default.device[1] = output_device_indices[output_devices.index(output_device)] - logger.info(f"Input device set to {sd.default.device[0]}: {input_device}") - logger.info(f"Output device set to {sd.default.device[1]}: {output_device}") - -audio_api = AudioAPI() - -@app.get("/inputDevices", response_model=list) -def get_input_devices(): - try: - input_devices, _, _, _ = audio_api.get_devices() - return input_devices - except Exception as e: - logger.error(f"Failed to get input devices: {e}") - raise HTTPException(status_code=500, detail="Failed to get input devices") - -@app.get("/outputDevices", response_model=list) -def get_output_devices(): - try: - _, output_devices, _, _ = audio_api.get_devices() - return output_devices - except Exception as e: - logger.error(f"Failed to get output devices: {e}") - raise HTTPException(status_code=500, detail="Failed to get output devices") - -@app.post("/config") -def configure_audio(config_data: ConfigData): - try: - logger.info(f"Configuring audio with data: {config_data}") - if audio_api.set_values(config_data): - settings = config_data.dict() - settings["use_jit"] = False - with open("configs/config.json", "w", encoding='utf-8') as j: - json.dump(settings, j, ensure_ascii=False) - logger.info("Configuration set successfully") - return {"message": "Configuration set successfully"} - except HTTPException as e: - logger.error(f"Configuration error: {e.detail}") - raise - except Exception as e: - logger.error(f"Configuration failed: {e}") - raise HTTPException(status_code=400, detail=f"Configuration failed: {e}") - -@app.post("/start") -def start_conversion(): - try: - if not audio_api.flag_vc: - audio_api.start_vc() - return {"message": "Audio conversion started"} - else: - logger.warning("Audio conversion already running") - raise HTTPException(status_code=400, detail="Audio conversion already running") - except HTTPException as e: - logger.error(f"Start conversion error: {e.detail}") - raise - except Exception as e: - logger.error(f"Failed to start conversion: {e}") - raise HTTPException(status_code=500, detail="Failed to start conversion: {e}") - -@app.post("/stop") -def stop_conversion(): - try: - if audio_api.flag_vc: - audio_api.flag_vc = False - global stream_latency - stream_latency = -1 - return {"message": "Audio conversion stopped"} - else: - logger.warning("Audio conversion not running") - raise HTTPException(status_code=400, detail="Audio conversion not running") - except HTTPException as e: - logger.error(f"Stop conversion error: {e.detail}") - raise - except Exception as e: - logger.error(f"Failed to stop conversion: {e}") - raise HTTPException(status_code=500, detail="Failed to stop conversion: {e}") - -if __name__ == "__main__": - if sys.platform == "win32": - freeze_support() - load_dotenv() - os.environ["OMP_NUM_THREADS"] = "4" - if sys.platform == "darwin": - os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" - from tools.torchgate import TorchGate - import tools.rvc_for_realtime as rvc_for_realtime - from configs.config import Config - audio_api.config = Config() - audio_api.initialize_queues() - uvicorn.run(app, host="0.0.0.0", port=6242) diff --git a/configure_gpu_deps.py b/configure_gpu_deps.py index 8cf36d4..ec326be 100644 --- a/configure_gpu_deps.py +++ b/configure_gpu_deps.py @@ -65,13 +65,15 @@ def process_lines(lines, target_gpu): return output_lines def main(): - if len(sys.argv) != 3: + if len(sys.argv) != 2: print("Usage: python configure_gpu_deps.py ") print(" where is either 'nvidia' or 'amd'") sys.exit(1) - toml_path = sys.argv[1] - gpu_type = sys.argv[2].lower() + gpu_type = sys.argv[1].lower() + toml_path = "pyproject.toml" + with open(toml_path, "r", encoding="utf-8") as f: + lines = f.readlines() if gpu_type not in {"nvidia", "amd"}: print("gpu_type must be either 'nvidia' or 'amd'") sys.exit(1) diff --git a/environment_dml.yaml b/environment_dml.yaml deleted file mode 100644 index 0fb3f22..0000000 --- a/environment_dml.yaml +++ /dev/null @@ -1,186 +0,0 @@ -name: pydml -channels: - - pytorch - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ - - defaults - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/fastai/ - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/ -dependencies: - - abseil-cpp=20211102.0=hd77b12b_0 - - absl-py=1.3.0=py310haa95532_0 - - aiohttp=3.8.3=py310h2bbff1b_0 - - aiosignal=1.2.0=pyhd3eb1b0_0 - - async-timeout=4.0.2=py310haa95532_0 - - attrs=22.1.0=py310haa95532_0 - - blas=1.0=mkl - - blinker=1.4=py310haa95532_0 - - bottleneck=1.3.5=py310h9128911_0 - - brotli=1.0.9=h2bbff1b_7 - - brotli-bin=1.0.9=h2bbff1b_7 - - brotlipy=0.7.0=py310h2bbff1b_1002 - - bzip2=1.0.8=he774522_0 - - c-ares=1.19.0=h2bbff1b_0 - - ca-certificates=2023.05.30=haa95532_0 - - cachetools=4.2.2=pyhd3eb1b0_0 - - certifi=2023.5.7=py310haa95532_0 - - cffi=1.15.1=py310h2bbff1b_3 - - charset-normalizer=2.0.4=pyhd3eb1b0_0 - - click=8.0.4=py310haa95532_0 - - colorama=0.4.6=py310haa95532_0 - - contourpy=1.0.5=py310h59b6b97_0 - - cryptography=39.0.1=py310h21b164f_0 - - cycler=0.11.0=pyhd3eb1b0_0 - - fonttools=4.25.0=pyhd3eb1b0_0 - - freetype=2.12.1=ha860e81_0 - - frozenlist=1.3.3=py310h2bbff1b_0 - - giflib=5.2.1=h8cc25b3_3 - - glib=2.69.1=h5dc1a3c_2 - - google-auth=2.6.0=pyhd3eb1b0_0 - - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0 - - grpc-cpp=1.48.2=hf108199_0 - - grpcio=1.48.2=py310hf108199_0 - - gst-plugins-base=1.18.5=h9e645db_0 - - gstreamer=1.18.5=hd78058f_0 - - icu=58.2=ha925a31_3 - - idna=3.4=py310haa95532_0 - - intel-openmp=2023.1.0=h59b6b97_46319 - - jpeg=9e=h2bbff1b_1 - - kiwisolver=1.4.4=py310hd77b12b_0 - - krb5=1.19.4=h5b6d351_0 - - lerc=3.0=hd77b12b_0 - - libbrotlicommon=1.0.9=h2bbff1b_7 - - libbrotlidec=1.0.9=h2bbff1b_7 - - libbrotlienc=1.0.9=h2bbff1b_7 - - libclang=14.0.6=default_hb5a9fac_1 - - libclang13=14.0.6=default_h8e68704_1 - - libdeflate=1.17=h2bbff1b_0 - - libffi=3.4.4=hd77b12b_0 - - libiconv=1.16=h2bbff1b_2 - - libogg=1.3.5=h2bbff1b_1 - - libpng=1.6.39=h8cc25b3_0 - - libprotobuf=3.20.3=h23ce68f_0 - - libtiff=4.5.0=h6c2663c_2 - - libuv=1.44.2=h2bbff1b_0 - - libvorbis=1.3.7=he774522_0 - - libwebp=1.2.4=hbc33d0d_1 - - libwebp-base=1.2.4=h2bbff1b_1 - - libxml2=2.10.3=h0ad7f3c_0 - - libxslt=1.1.37=h2bbff1b_0 - - lz4-c=1.9.4=h2bbff1b_0 - - markdown=3.4.1=py310haa95532_0 - - markupsafe=2.1.1=py310h2bbff1b_0 - - matplotlib=3.7.1=py310haa95532_1 - - matplotlib-base=3.7.1=py310h4ed8f06_1 - - mkl=2023.1.0=h8bd8f75_46356 - - mkl-service=2.4.0=py310h2bbff1b_1 - - mkl_fft=1.3.6=py310h4ed8f06_1 - - mkl_random=1.2.2=py310h4ed8f06_1 - - multidict=6.0.2=py310h2bbff1b_0 - - munkres=1.1.4=py_0 - - numexpr=2.8.4=py310h2cd9be0_1 - - numpy=1.24.3=py310h055cbcc_1 - - numpy-base=1.24.3=py310h65a83cf_1 - - oauthlib=3.2.2=py310haa95532_0 - - openssl=1.1.1t=h2bbff1b_0 - - packaging=23.0=py310haa95532_0 - - pandas=1.5.3=py310h4ed8f06_0 - - pcre=8.45=hd77b12b_0 - - pillow=9.4.0=py310hd77b12b_0 - - pip=23.0.1=py310haa95532_0 - - ply=3.11=py310haa95532_0 - - protobuf=3.20.3=py310hd77b12b_0 - - pyasn1=0.4.8=pyhd3eb1b0_0 - - pyasn1-modules=0.2.8=py_0 - - pycparser=2.21=pyhd3eb1b0_0 - - pyjwt=2.4.0=py310haa95532_0 - - pyopenssl=23.0.0=py310haa95532_0 - - pyparsing=3.0.9=py310haa95532_0 - - pyqt=5.15.7=py310hd77b12b_0 - - pyqt5-sip=12.11.0=py310hd77b12b_0 - - pysocks=1.7.1=py310haa95532_0 - - python=3.10.11=h966fe2a_2 - - python-dateutil=2.8.2=pyhd3eb1b0_0 - - pytorch-mutex=1.0=cpu - - pytz=2022.7=py310haa95532_0 - - pyyaml=6.0=py310h2bbff1b_1 - - qt-main=5.15.2=he8e5bd7_8 - - qt-webengine=5.15.9=hb9a9bb5_5 - - qtwebkit=5.212=h2bbfb41_5 - - re2=2022.04.01=hd77b12b_0 - - requests=2.29.0=py310haa95532_0 - - requests-oauthlib=1.3.0=py_0 - - rsa=4.7.2=pyhd3eb1b0_1 - - setuptools=67.8.0=py310haa95532_0 - - sip=6.6.2=py310hd77b12b_0 - - six=1.16.0=pyhd3eb1b0_1 - - sqlite=3.41.2=h2bbff1b_0 - - tbb=2021.8.0=h59b6b97_0 - - tensorboard=2.10.0=py310haa95532_0 - - tensorboard-data-server=0.6.1=py310haa95532_0 - - tensorboard-plugin-wit=1.8.1=py310haa95532_0 - - tk=8.6.12=h2bbff1b_0 - - toml=0.10.2=pyhd3eb1b0_0 - - tornado=6.2=py310h2bbff1b_0 - - tqdm=4.65.0=py310h9909e9c_0 - - typing_extensions=4.5.0=py310haa95532_0 - - tzdata=2023c=h04d1e81_0 - - urllib3=1.26.16=py310haa95532_0 - - vc=14.2=h21ff451_1 - - vs2015_runtime=14.27.29016=h5e58377_2 - - werkzeug=2.2.3=py310haa95532_0 - - wheel=0.38.4=py310haa95532_0 - - win_inet_pton=1.1.0=py310haa95532_0 - - xz=5.4.2=h8cc25b3_0 - - yaml=0.2.5=he774522_0 - - yarl=1.8.1=py310h2bbff1b_0 - - zlib=1.2.13=h8cc25b3_0 - - zstd=1.5.5=hd43e919_0 - - pip: - - antlr4-python3-runtime==4.8 - - appdirs==1.4.4 - - audioread==3.0.0 - - bitarray==2.7.4 - - cython==0.29.35 - - decorator==5.1.1 - - fairseq==0.12.2 - - faiss-cpu==1.7.4 - - filelock==3.12.0 - - hydra-core==1.0.7 - - jinja2==3.1.2 - - joblib==1.2.0 - - lazy-loader==0.2 - - librosa==0.10.0.post2 - - llvmlite==0.40.0 - - lxml==4.9.2 - - mpmath==1.3.0 - - msgpack==1.0.5 - - networkx==3.1 - - noisereduce==2.0.1 - - numba==0.57.0 - - omegaconf==2.0.6 - - opencv-python==4.7.0.72 - - pooch==1.6.0 - - portalocker==2.7.0 - - pysimplegui==4.60.5 - - pywin32==306 - - pyworld==0.3.3 - - regex==2023.5.5 - - sacrebleu==2.3.1 - - scikit-learn==1.2.2 - - scipy==1.10.1 - - sounddevice==0.4.6 - - soundfile==0.12.1 - - soxr==0.3.5 - - sympy==1.12 - - tabulate==0.9.0 - - threadpoolctl==3.1.0 - - torch==2.0.0 - - torch-directml==0.2.0.dev230426 - - torchaudio==2.0.1 - - torchvision==0.15.1 - - wget==3.2 -prefix: D:\ProgramData\anaconda3_\envs\pydml diff --git a/go-realtime-gui-dml.bat b/go-realtime-gui-dml.bat deleted file mode 100644 index 2c87633..0000000 --- a/go-realtime-gui-dml.bat +++ /dev/null @@ -1,2 +0,0 @@ -runtime\python.exe gui_v1.py --pycmd runtime\python.exe --dml -pause diff --git a/go-realtime-gui.bat b/go-realtime-gui.bat deleted file mode 100644 index 8c08290..0000000 --- a/go-realtime-gui.bat +++ /dev/null @@ -1,2 +0,0 @@ -runtime\python.exe gui_v1.py -pause diff --git a/go-web-dml.bat b/go-web-dml.bat deleted file mode 100644 index fc5e708..0000000 --- a/go-web-dml.bat +++ /dev/null @@ -1,2 +0,0 @@ -runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897 --dml -pause diff --git a/go-web.bat b/go-web.bat deleted file mode 100644 index db1dec5..0000000 --- a/go-web.bat +++ /dev/null @@ -1,2 +0,0 @@ -runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897 -pause diff --git a/gui_v1.py b/gui_v1.py deleted file mode 100644 index c5e7179..0000000 --- a/gui_v1.py +++ /dev/null @@ -1,1070 +0,0 @@ -import os -import sys -from dotenv import load_dotenv -import shutil - -load_dotenv() - -os.environ["OMP_NUM_THREADS"] = "4" -if sys.platform == "darwin": - os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" - -now_dir = os.getcwd() -sys.path.append(now_dir) -import multiprocessing - -flag_vc = False - - -def printt(strr, *args): - if len(args) == 0: - print(strr) - else: - print(strr % args) - - -def phase_vocoder(a, b, fade_out, fade_in): - window = torch.sqrt(fade_out * fade_in) - fa = torch.fft.rfft(a * window) - fb = torch.fft.rfft(b * window) - absab = torch.abs(fa) + torch.abs(fb) - n = a.shape[0] - if n % 2 == 0: - absab[1:-1] *= 2 - else: - absab[1:] *= 2 - phia = torch.angle(fa) - phib = torch.angle(fb) - deltaphase = phib - phia - deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5) - w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase - t = torch.arange(n).unsqueeze(-1).to(a) / n - result = ( - a * (fade_out**2) - + b * (fade_in**2) - + torch.sum(absab * torch.cos(w * t + phia), -1) * window / n - ) - return result - - -class Harvest(multiprocessing.Process): - def __init__(self, inp_q, opt_q): - multiprocessing.Process.__init__(self) - self.inp_q = inp_q - self.opt_q = opt_q - - def run(self): - import numpy as np - import pyworld - - while 1: - idx, x, res_f0, n_cpu, ts = self.inp_q.get() - f0, t = pyworld.harvest( - x.astype(np.double), - fs=16000, - f0_ceil=1100, - f0_floor=50, - frame_period=10, - ) - res_f0[idx] = f0 - if len(res_f0.keys()) >= n_cpu: - self.opt_q.put(ts) - - -if __name__ == "__main__": - import json - import multiprocessing - import re - import threading - import time - import traceback - from multiprocessing import Queue, cpu_count - from queue import Empty - - import librosa - from tools.torchgate import TorchGate - import numpy as np - import FreeSimpleGUI as sg - import sounddevice as sd - import torch - import torch.nn.functional as F - import torchaudio.transforms as tat - - from infer.lib import rtrvc as rvc_for_realtime - from i18n.i18n import I18nAuto - from configs.config import Config - - i18n = I18nAuto() - - # device = rvc_for_realtime.config.device - # device = torch.device( - # "cuda" - # if torch.cuda.is_available() - # else ("mps" if torch.backends.mps.is_available() else "cpu") - # ) - current_dir = os.getcwd() - inp_q = Queue() - opt_q = Queue() - n_cpu = min(cpu_count(), 8) - for _ in range(n_cpu): - p = Harvest(inp_q, opt_q) - p.daemon = True - p.start() - - class GUIConfig: - def __init__(self) -> None: - self.pth_path: str = "" - self.index_path: str = "" - self.pitch: int = 0 - self.formant=0.0 - self.sr_type: str = "sr_model" - self.block_time: float = 0.25 # s - self.threhold: int = -60 - self.crossfade_time: float = 0.05 - self.extra_time: float = 2.5 - self.I_noise_reduce: bool = False - self.O_noise_reduce: bool = False - self.use_pv: bool = False - self.rms_mix_rate: float = 0.0 - self.index_rate: float = 0.0 - self.n_cpu: int = min(n_cpu, 4) - self.f0method: str = "fcpe" - self.sg_hostapi: str = "" - self.wasapi_exclusive: bool = False - self.sg_input_device: str = "" - self.sg_output_device: str = "" - - class GUI: - def __init__(self) -> None: - self.gui_config = GUIConfig() - self.config = Config() - self.function = "vc" - self.delay_time = 0 - self.hostapis = None - self.input_devices = None - self.output_devices = None - self.input_devices_indices = None - self.output_devices_indices = None - self.stream = None - self.update_devices() - self.launcher() - - def load(self): - try: - if not os.path.exists("configs/inuse/config.json"): - shutil.copy("configs/config.json", "configs/inuse/config.json") - with open("configs/inuse/config.json", "r") as j: - data = json.load(j) - data["sr_model"] = data["sr_type"] == "sr_model" - data["sr_device"] = data["sr_type"] == "sr_device" - data["pm"] = data["f0method"] == "pm" - data["harvest"] = data["f0method"] == "harvest" - data["crepe"] = data["f0method"] == "crepe" - data["rmvpe"] = data["f0method"] == "rmvpe" - data["fcpe"] = data["f0method"] == "fcpe" - if data["sg_hostapi"] in self.hostapis: - self.update_devices(hostapi_name=data["sg_hostapi"]) - if ( - data["sg_input_device"] not in self.input_devices - or data["sg_output_device"] not in self.output_devices - ): - self.update_devices() - data["sg_hostapi"] = self.hostapis[0] - data["sg_input_device"] = self.input_devices[ - self.input_devices_indices.index(sd.default.device[0]) - ] - data["sg_output_device"] = self.output_devices[ - self.output_devices_indices.index(sd.default.device[1]) - ] - else: - data["sg_hostapi"] = self.hostapis[0] - data["sg_input_device"] = self.input_devices[ - self.input_devices_indices.index(sd.default.device[0]) - ] - data["sg_output_device"] = self.output_devices[ - self.output_devices_indices.index(sd.default.device[1]) - ] - except: - with open("configs/inuse/config.json", "w") as j: - data = { - "pth_path": "", - "index_path": "", - "sg_hostapi": self.hostapis[0], - "sg_wasapi_exclusive": False, - "sg_input_device": self.input_devices[ - self.input_devices_indices.index(sd.default.device[0]) - ], - "sg_output_device": self.output_devices[ - self.output_devices_indices.index(sd.default.device[1]) - ], - "sr_type": "sr_model", - "threhold": -60, - "pitch": 0, - "formant": 0.0, - "index_rate": 0, - "rms_mix_rate": 0, - "block_time": 0.25, - "crossfade_length": 0.05, - "extra_time": 2.5, - "n_cpu": 4, - "f0method": "rmvpe", - "use_jit": False, - "use_pv": False, - } - data["sr_model"] = data["sr_type"] == "sr_model" - data["sr_device"] = data["sr_type"] == "sr_device" - data["pm"] = data["f0method"] == "pm" - data["harvest"] = data["f0method"] == "harvest" - data["crepe"] = data["f0method"] == "crepe" - data["rmvpe"] = data["f0method"] == "rmvpe" - data["fcpe"] = data["f0method"] == "fcpe" - return data - - def launcher(self): - data = self.load() - self.config.use_jit = False # data.get("use_jit", self.config.use_jit) - sg.theme("LightBlue3") - layout = [ - [ - sg.Frame( - title=i18n("加载模型"), - layout=[ - [ - sg.Input( - default_text=data.get("pth_path", ""), - key="pth_path", - ), - sg.FileBrowse( - i18n("选择.pth文件"), - initial_folder=os.path.join( - os.getcwd(), "assets/weights" - ), - file_types=((". pth"),), - ), - ], - [ - sg.Input( - default_text=data.get("index_path", ""), - key="index_path", - ), - sg.FileBrowse( - i18n("选择.index文件"), - initial_folder=os.path.join(os.getcwd(), "logs"), - file_types=((". index"),), - ), - ], - ], - ) - ], - [ - sg.Frame( - layout=[ - [ - sg.Text(i18n("设备类型")), - sg.Combo( - self.hostapis, - key="sg_hostapi", - default_value=data.get("sg_hostapi", ""), - enable_events=True, - size=(20, 1), - ), - sg.Checkbox( - i18n("独占 WASAPI 设备"), - key="sg_wasapi_exclusive", - default=data.get("sg_wasapi_exclusive", False), - enable_events=True, - ), - ], - [ - sg.Text(i18n("输入设备")), - sg.Combo( - self.input_devices, - key="sg_input_device", - default_value=data.get("sg_input_device", ""), - enable_events=True, - size=(45, 1), - ), - ], - [ - sg.Text(i18n("输出设备")), - sg.Combo( - self.output_devices, - key="sg_output_device", - default_value=data.get("sg_output_device", ""), - enable_events=True, - size=(45, 1), - ), - ], - [ - sg.Button(i18n("重载设备列表"), key="reload_devices"), - sg.Radio( - i18n("使用模型采样率"), - "sr_type", - key="sr_model", - default=data.get("sr_model", True), - enable_events=True, - ), - sg.Radio( - i18n("使用设备采样率"), - "sr_type", - key="sr_device", - default=data.get("sr_device", False), - enable_events=True, - ), - sg.Text(i18n("采样率:")), - sg.Text("", key="sr_stream"), - ], - ], - title=i18n("音频设备"), - ) - ], - [ - sg.Frame( - layout=[ - [ - sg.Text(i18n("响应阈值")), - sg.Slider( - range=(-60, 0), - key="threhold", - resolution=1, - orientation="h", - default_value=data.get("threhold", -60), - enable_events=True, - ), - ], - [ - sg.Text(i18n("音调设置")), - sg.Slider( - range=(-16, 16), - key="pitch", - resolution=1, - orientation="h", - default_value=data.get("pitch", 0), - enable_events=True, - ), - ], - [ - sg.Text(i18n("性别因子/声线粗细")), - sg.Slider( - range=(-2, 2), - key="formant", - resolution=0.05, - orientation="h", - default_value=data.get("formant", 0.0), - enable_events=True, - ), - ], - [ - sg.Text(i18n("Index Rate")), - sg.Slider( - range=(0.0, 1.0), - key="index_rate", - resolution=0.01, - orientation="h", - default_value=data.get("index_rate", 0), - enable_events=True, - ), - ], - [ - sg.Text(i18n("响度因子")), - sg.Slider( - range=(0.0, 1.0), - key="rms_mix_rate", - resolution=0.01, - orientation="h", - default_value=data.get("rms_mix_rate", 0), - enable_events=True, - ), - ], - [ - sg.Text(i18n("音高算法")), - sg.Radio( - "pm", - "f0method", - key="pm", - default=data.get("pm", False), - enable_events=True, - ), - sg.Radio( - "harvest", - "f0method", - key="harvest", - default=data.get("harvest", False), - enable_events=True, - ), - sg.Radio( - "crepe", - "f0method", - key="crepe", - default=data.get("crepe", False), - enable_events=True, - ), - sg.Radio( - "rmvpe", - "f0method", - key="rmvpe", - default=data.get("rmvpe", False), - enable_events=True, - ), - sg.Radio( - "fcpe", - "f0method", - key="fcpe", - default=data.get("fcpe", True), - enable_events=True, - ), - ], - ], - title=i18n("常规设置"), - ), - sg.Frame( - layout=[ - [ - sg.Text(i18n("采样长度")), - sg.Slider( - range=(0.02, 1.5), - key="block_time", - resolution=0.01, - orientation="h", - default_value=data.get("block_time", 0.25), - enable_events=True, - ), - ], - # [ - # sg.Text("设备延迟"), - # sg.Slider( - # range=(0, 1), - # key="device_latency", - # resolution=0.001, - # orientation="h", - # default_value=data.get("device_latency", 0.1), - # enable_events=True, - # ), - # ], - [ - sg.Text(i18n("harvest进程数")), - sg.Slider( - range=(1, n_cpu), - key="n_cpu", - resolution=1, - orientation="h", - default_value=data.get( - "n_cpu", min(self.gui_config.n_cpu, n_cpu) - ), - enable_events=True, - ), - ], - [ - sg.Text(i18n("淡入淡出长度")), - sg.Slider( - range=(0.01, 0.15), - key="crossfade_length", - resolution=0.01, - orientation="h", - default_value=data.get("crossfade_length", 0.05), - enable_events=True, - ), - ], - [ - sg.Text(i18n("额外推理时长")), - sg.Slider( - range=(0.05, 5.00), - key="extra_time", - resolution=0.01, - orientation="h", - default_value=data.get("extra_time", 2.5), - enable_events=True, - ), - ], - [ - sg.Checkbox( - i18n("输入降噪"), - key="I_noise_reduce", - enable_events=True, - ), - sg.Checkbox( - i18n("输出降噪"), - key="O_noise_reduce", - enable_events=True, - ), - sg.Checkbox( - i18n("启用相位声码器"), - key="use_pv", - default=data.get("use_pv", False), - enable_events=True, - ), - # sg.Checkbox( - # "JIT加速", - # default=self.config.use_jit, - # key="use_jit", - # enable_events=False, - # ), - ], - # [sg.Text("注:首次使用JIT加速时,会出现卡顿,\n 并伴随一些噪音,但这是正常现象!")], - ], - title=i18n("性能设置"), - ), - ], - [ - sg.Button(i18n("开始音频转换"), key="start_vc"), - sg.Button(i18n("停止音频转换"), key="stop_vc"), - sg.Radio( - i18n("输入监听"), - "function", - key="im", - default=False, - enable_events=True, - ), - sg.Radio( - i18n("输出变声"), - "function", - key="vc", - default=True, - enable_events=True, - ), - sg.Text(i18n("算法延迟(ms):")), - sg.Text("0", key="delay_time"), - sg.Text(i18n("推理时间(ms):")), - sg.Text("0", key="infer_time"), - ], - ] - self.window = sg.Window("RVC - GUI", layout=layout, finalize=True) - self.event_handler() - - def event_handler(self): - global flag_vc - while True: - event, values = self.window.read() - if event == sg.WINDOW_CLOSED: - self.stop_stream() - exit() - if event == "reload_devices" or event == "sg_hostapi": - self.gui_config.sg_hostapi = values["sg_hostapi"] - self.update_devices(hostapi_name=values["sg_hostapi"]) - if self.gui_config.sg_hostapi not in self.hostapis: - self.gui_config.sg_hostapi = self.hostapis[0] - self.window["sg_hostapi"].Update(values=self.hostapis) - self.window["sg_hostapi"].Update(value=self.gui_config.sg_hostapi) - if ( - self.gui_config.sg_input_device not in self.input_devices - and len(self.input_devices) > 0 - ): - self.gui_config.sg_input_device = self.input_devices[0] - self.window["sg_input_device"].Update(values=self.input_devices) - self.window["sg_input_device"].Update( - value=self.gui_config.sg_input_device - ) - if self.gui_config.sg_output_device not in self.output_devices: - self.gui_config.sg_output_device = self.output_devices[0] - self.window["sg_output_device"].Update(values=self.output_devices) - self.window["sg_output_device"].Update( - value=self.gui_config.sg_output_device - ) - if event == "start_vc" and not flag_vc: - if self.set_values(values) == True: - printt("cuda_is_available: %s", torch.cuda.is_available()) - self.start_vc() - settings = { - "pth_path": values["pth_path"], - "index_path": values["index_path"], - "sg_hostapi": values["sg_hostapi"], - "sg_wasapi_exclusive": values["sg_wasapi_exclusive"], - "sg_input_device": values["sg_input_device"], - "sg_output_device": values["sg_output_device"], - "sr_type": ["sr_model", "sr_device"][ - [ - values["sr_model"], - values["sr_device"], - ].index(True) - ], - "threhold": values["threhold"], - "pitch": values["pitch"], - "rms_mix_rate": values["rms_mix_rate"], - "index_rate": values["index_rate"], - # "device_latency": values["device_latency"], - "block_time": values["block_time"], - "crossfade_length": values["crossfade_length"], - "extra_time": values["extra_time"], - "n_cpu": values["n_cpu"], - # "use_jit": values["use_jit"], - "use_jit": False, - "use_pv": values["use_pv"], - "f0method": ["pm", "harvest", "crepe", "rmvpe", "fcpe"][ - [ - values["pm"], - values["harvest"], - values["crepe"], - values["rmvpe"], - values["fcpe"], - ].index(True) - ], - } - with open("configs/inuse/config.json", "w") as j: - json.dump(settings, j) - if self.stream is not None: - self.delay_time = ( - self.stream.latency[-1] - + values["block_time"] - + values["crossfade_length"] - + 0.01 - ) - if values["I_noise_reduce"]: - self.delay_time += min(values["crossfade_length"], 0.04) - self.window["sr_stream"].update(self.gui_config.samplerate) - self.window["delay_time"].update( - int(np.round(self.delay_time * 1000)) - ) - # Parameter hot update - if event == "threhold": - self.gui_config.threhold = values["threhold"] - elif event == "pitch": - self.gui_config.pitch = values["pitch"] - if hasattr(self, "rvc"): - self.rvc.change_key(values["pitch"]) - elif event == "formant": - self.gui_config.formant = values["formant"] - if hasattr(self, "rvc"): - self.rvc.change_formant(values["formant"]) - elif event == "index_rate": - self.gui_config.index_rate = values["index_rate"] - if hasattr(self, "rvc"): - self.rvc.change_index_rate(values["index_rate"]) - elif event == "rms_mix_rate": - self.gui_config.rms_mix_rate = values["rms_mix_rate"] - elif event in ["pm", "harvest", "crepe", "rmvpe", "fcpe"]: - self.gui_config.f0method = event - elif event == "I_noise_reduce": - self.gui_config.I_noise_reduce = values["I_noise_reduce"] - if self.stream is not None: - self.delay_time += ( - 1 if values["I_noise_reduce"] else -1 - ) * min(values["crossfade_length"], 0.04) - self.window["delay_time"].update( - int(np.round(self.delay_time * 1000)) - ) - elif event == "O_noise_reduce": - self.gui_config.O_noise_reduce = values["O_noise_reduce"] - elif event == "use_pv": - self.gui_config.use_pv = values["use_pv"] - elif event in ["vc", "im"]: - self.function = event - elif event == "stop_vc" or event != "start_vc": - # Other parameters do not support hot update - self.stop_stream() - - def set_values(self, values): - if len(values["pth_path"].strip()) == 0: - sg.popup(i18n("请选择pth文件")) - return False - if len(values["index_path"].strip()) == 0: - sg.popup(i18n("请选择index文件")) - return False - pattern = re.compile("[^\x00-\x7F]+") - if pattern.findall(values["pth_path"]): - sg.popup(i18n("pth文件路径不可包含中文")) - return False - if pattern.findall(values["index_path"]): - sg.popup(i18n("index文件路径不可包含中文")) - return False - self.set_devices(values["sg_input_device"], values["sg_output_device"]) - self.config.use_jit = False # values["use_jit"] - # self.device_latency = values["device_latency"] - self.gui_config.sg_hostapi = values["sg_hostapi"] - self.gui_config.sg_wasapi_exclusive = values["sg_wasapi_exclusive"] - self.gui_config.sg_input_device = values["sg_input_device"] - self.gui_config.sg_output_device = values["sg_output_device"] - self.gui_config.pth_path = values["pth_path"] - self.gui_config.index_path = values["index_path"] - self.gui_config.sr_type = ["sr_model", "sr_device"][ - [ - values["sr_model"], - values["sr_device"], - ].index(True) - ] - self.gui_config.threhold = values["threhold"] - self.gui_config.pitch = values["pitch"] - self.gui_config.formant = values["formant"] - self.gui_config.block_time = values["block_time"] - self.gui_config.crossfade_time = values["crossfade_length"] - self.gui_config.extra_time = values["extra_time"] - self.gui_config.I_noise_reduce = values["I_noise_reduce"] - self.gui_config.O_noise_reduce = values["O_noise_reduce"] - self.gui_config.use_pv = values["use_pv"] - self.gui_config.rms_mix_rate = values["rms_mix_rate"] - self.gui_config.index_rate = values["index_rate"] - self.gui_config.n_cpu = values["n_cpu"] - self.gui_config.f0method = ["pm", "harvest", "crepe", "rmvpe", "fcpe"][ - [ - values["pm"], - values["harvest"], - values["crepe"], - values["rmvpe"], - values["fcpe"], - ].index(True) - ] - return True - - def start_vc(self): - torch.cuda.empty_cache() - self.rvc = rvc_for_realtime.RVC( - self.gui_config.pitch, - self.gui_config.formant, - self.gui_config.pth_path, - self.gui_config.index_path, - self.gui_config.index_rate, - self.gui_config.n_cpu, - inp_q, - opt_q, - self.config, - self.rvc if hasattr(self, "rvc") else None, - ) - self.gui_config.samplerate = ( - self.rvc.tgt_sr - if self.gui_config.sr_type == "sr_model" - else self.get_device_samplerate() - ) - self.gui_config.channels = self.get_device_channels() - self.zc = self.gui_config.samplerate // 100 - self.block_frame = ( - int( - np.round( - self.gui_config.block_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.block_frame_16k = 160 * self.block_frame // self.zc - self.crossfade_frame = ( - int( - np.round( - self.gui_config.crossfade_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.sola_buffer_frame = min(self.crossfade_frame, 4 * self.zc) - self.sola_search_frame = self.zc - self.extra_frame = ( - int( - np.round( - self.gui_config.extra_time - * self.gui_config.samplerate - / self.zc - ) - ) - * self.zc - ) - self.input_wav: torch.Tensor = torch.zeros( - self.extra_frame - + self.crossfade_frame - + self.sola_search_frame - + self.block_frame, - device=self.config.device, - dtype=torch.float32, - ) - self.input_wav_denoise: torch.Tensor = self.input_wav.clone() - self.input_wav_res: torch.Tensor = torch.zeros( - 160 * self.input_wav.shape[0] // self.zc, - device=self.config.device, - dtype=torch.float32, - ) - self.rms_buffer: np.ndarray = np.zeros(4 * self.zc, dtype="float32") - self.sola_buffer: torch.Tensor = torch.zeros( - self.sola_buffer_frame, device=self.config.device, dtype=torch.float32 - ) - self.nr_buffer: torch.Tensor = self.sola_buffer.clone() - self.output_buffer: torch.Tensor = self.input_wav.clone() - self.skip_head = self.extra_frame // self.zc - self.return_length = ( - self.block_frame + self.sola_buffer_frame + self.sola_search_frame - ) // self.zc - self.fade_in_window: torch.Tensor = ( - torch.sin( - 0.5 - * np.pi - * torch.linspace( - 0.0, - 1.0, - steps=self.sola_buffer_frame, - device=self.config.device, - dtype=torch.float32, - ) - ) - ** 2 - ) - self.fade_out_window: torch.Tensor = 1 - self.fade_in_window - self.resampler = tat.Resample( - orig_freq=self.gui_config.samplerate, - new_freq=16000, - dtype=torch.float32, - ).to(self.config.device) - if self.rvc.tgt_sr != self.gui_config.samplerate: - self.resampler2 = tat.Resample( - orig_freq=self.rvc.tgt_sr, - new_freq=self.gui_config.samplerate, - dtype=torch.float32, - ).to(self.config.device) - else: - self.resampler2 = None - self.tg = TorchGate( - sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9 - ).to(self.config.device) - self.start_stream() - - def start_stream(self): - global flag_vc - if not flag_vc: - flag_vc = True - if ( - "WASAPI" in self.gui_config.sg_hostapi - and self.gui_config.sg_wasapi_exclusive - ): - extra_settings = sd.WasapiSettings(exclusive=True) - else: - extra_settings = None - self.stream = sd.Stream( - callback=self.audio_callback, - blocksize=self.block_frame, - samplerate=self.gui_config.samplerate, - channels=self.gui_config.channels, - dtype="float32", - extra_settings=extra_settings, - ) - self.stream.start() - - def stop_stream(self): - global flag_vc - if flag_vc: - flag_vc = False - if self.stream is not None: - self.stream.abort() - self.stream.close() - self.stream = None - - def audio_callback( - self, indata: np.ndarray, outdata: np.ndarray, frames, times, status - ): - """ - 音频处理 - """ - global flag_vc - start_time = time.perf_counter() - indata = librosa.to_mono(indata.T) - if self.gui_config.threhold > -60: - indata = np.append(self.rms_buffer, indata) - rms = librosa.feature.rms( - y=indata, frame_length=4 * self.zc, hop_length=self.zc - )[:, 2:] - self.rms_buffer[:] = indata[-4 * self.zc :] - indata = indata[2 * self.zc - self.zc // 2 :] - db_threhold = ( - librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold - ) - for i in range(db_threhold.shape[0]): - if db_threhold[i]: - indata[i * self.zc : (i + 1) * self.zc] = 0 - indata = indata[self.zc // 2 :] - self.input_wav[: -self.block_frame] = self.input_wav[ - self.block_frame : - ].clone() - self.input_wav[-indata.shape[0] :] = torch.from_numpy(indata).to( - self.config.device - ) - self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[ - self.block_frame_16k : - ].clone() - # input noise reduction and resampling - if self.gui_config.I_noise_reduce: - self.input_wav_denoise[: -self.block_frame] = self.input_wav_denoise[ - self.block_frame : - ].clone() - input_wav = self.input_wav[-self.sola_buffer_frame - self.block_frame :] - input_wav = self.tg( - input_wav.unsqueeze(0), self.input_wav.unsqueeze(0) - ).squeeze(0) - input_wav[: self.sola_buffer_frame] *= self.fade_in_window - input_wav[: self.sola_buffer_frame] += ( - self.nr_buffer * self.fade_out_window - ) - self.input_wav_denoise[-self.block_frame :] = input_wav[ - : self.block_frame - ] - self.nr_buffer[:] = input_wav[self.block_frame :] - self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler( - self.input_wav_denoise[-self.block_frame - 2 * self.zc :] - )[160:] - else: - self.input_wav_res[-160 * (indata.shape[0] // self.zc + 1) :] = ( - self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[ - 160: - ] - ) - # infer - if self.function == "vc": - infer_wav = self.rvc.infer( - self.input_wav_res, - self.block_frame_16k, - self.skip_head, - self.return_length, - self.gui_config.f0method, - ) - if self.resampler2 is not None: - infer_wav = self.resampler2(infer_wav) - elif self.gui_config.I_noise_reduce: - infer_wav = self.input_wav_denoise[self.extra_frame :].clone() - else: - infer_wav = self.input_wav[self.extra_frame :].clone() - # output noise reduction - if self.gui_config.O_noise_reduce and self.function == "vc": - self.output_buffer[: -self.block_frame] = self.output_buffer[ - self.block_frame : - ].clone() - self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :] - infer_wav = self.tg( - infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0) - ).squeeze(0) - # volume envelop mixing - if self.gui_config.rms_mix_rate < 1 and self.function == "vc": - if self.gui_config.I_noise_reduce: - input_wav = self.input_wav_denoise[self.extra_frame :] - else: - input_wav = self.input_wav[self.extra_frame :] - rms1 = librosa.feature.rms( - y=input_wav[: infer_wav.shape[0]].cpu().numpy(), - frame_length=4 * self.zc, - hop_length=self.zc, - ) - rms1 = torch.from_numpy(rms1).to(self.config.device) - rms1 = F.interpolate( - rms1.unsqueeze(0), - size=infer_wav.shape[0] + 1, - mode="linear", - align_corners=True, - )[0, 0, :-1] - rms2 = librosa.feature.rms( - y=infer_wav[:].cpu().numpy(), - frame_length=4 * self.zc, - hop_length=self.zc, - ) - rms2 = torch.from_numpy(rms2).to(self.config.device) - rms2 = F.interpolate( - rms2.unsqueeze(0), - size=infer_wav.shape[0] + 1, - mode="linear", - align_corners=True, - )[0, 0, :-1] - rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3) - infer_wav *= torch.pow( - rms1 / rms2, torch.tensor(1 - self.gui_config.rms_mix_rate) - ) - # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC - conv_input = infer_wav[ - None, None, : self.sola_buffer_frame + self.sola_search_frame - ] - cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :]) - cor_den = torch.sqrt( - F.conv1d( - conv_input**2, - torch.ones(1, 1, self.sola_buffer_frame, device=self.config.device), - ) - + 1e-8 - ) - if sys.platform == "darwin": - _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0]) - sola_offset = sola_offset.item() - else: - sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) - printt("sola_offset = %d", int(sola_offset)) - infer_wav = infer_wav[sola_offset:] - if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv: - infer_wav[: self.sola_buffer_frame] *= self.fade_in_window - infer_wav[: self.sola_buffer_frame] += ( - self.sola_buffer * self.fade_out_window - ) - else: - infer_wav[: self.sola_buffer_frame] = phase_vocoder( - self.sola_buffer, - infer_wav[: self.sola_buffer_frame], - self.fade_out_window, - self.fade_in_window, - ) - self.sola_buffer[:] = infer_wav[ - self.block_frame : self.block_frame + self.sola_buffer_frame - ] - outdata[:] = ( - infer_wav[: self.block_frame] - .repeat(self.gui_config.channels, 1) - .t() - .cpu() - .numpy() - ) - total_time = time.perf_counter() - start_time - if flag_vc: - self.window["infer_time"].update(int(total_time * 1000)) - printt("Infer time: %.2f", total_time) - - def update_devices(self, hostapi_name=None): - """获取设备列表""" - global flag_vc - flag_vc = False - sd._terminate() - sd._initialize() - devices = sd.query_devices() - hostapis = sd.query_hostapis() - for hostapi in hostapis: - for device_idx in hostapi["devices"]: - devices[device_idx]["hostapi_name"] = hostapi["name"] - self.hostapis = [hostapi["name"] for hostapi in hostapis] - if hostapi_name not in self.hostapis: - hostapi_name = self.hostapis[0] - self.input_devices = [ - d["name"] - for d in devices - if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name - ] - self.output_devices = [ - d["name"] - for d in devices - if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name - ] - self.input_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name - ] - self.output_devices_indices = [ - d["index"] if "index" in d else d["name"] - for d in devices - if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name - ] - - def set_devices(self, input_device, output_device): - """设置输出设备""" - sd.default.device[0] = self.input_devices_indices[ - self.input_devices.index(input_device) - ] - sd.default.device[1] = self.output_devices_indices[ - self.output_devices.index(output_device) - ] - printt("Input device: %s:%s", str(sd.default.device[0]), input_device) - printt("Output device: %s:%s", str(sd.default.device[1]), output_device) - - def get_device_samplerate(self): - return int( - sd.query_devices(device=sd.default.device[0])["default_samplerate"] - ) - - def get_device_channels(self): - max_input_channels = sd.query_devices(device=sd.default.device[0])[ - "max_input_channels" - ] - max_output_channels = sd.query_devices(device=sd.default.device[1])[ - "max_output_channels" - ] - return min(max_input_channels, max_output_channels, 2) - - gui = GUI() diff --git a/poetry.lock b/poetry.lock index 612c5d4..5921399 100644 --- a/poetry.lock +++ b/poetry.lock @@ -174,13 +174,13 @@ doc = ["docutils", "jinja2", "myst-parser", "numpydoc", "pillow (>=9,<10)", "pyd [[package]] name = "antlr4-python3-runtime" -version = "4.8" -description = "ANTLR 4.8 runtime for Python 3.7" +version = "4.9.3" +description = "ANTLR 4.9.3 runtime for Python 3.7" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "antlr4-python3-runtime-4.8.tar.gz", hash = "sha256:15793f5d0512a372b4e7d2284058ad32ce7dd27126b105fb0b2245130445db33"}, + {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, ] [[package]] @@ -204,23 +204,6 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] trio = ["trio (>=0.23)"] -[[package]] -name = "aria2" -version = "0.0.1b0" -description = "python wheel for aria2 static build." -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "aria2-0.0.1b0-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:349f9fe693d900453d2c1a885cab761a0fce6b92ac7e390df5da32edbb5018e5"}, - {file = "aria2-0.0.1b0-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:80e6626d297e8d391f483e173abc193a7232a74b26c108c89250e697ced02596"}, - {file = "aria2-0.0.1b0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:8bbbeaa1bc1a0b30ccdd7909e786821fb345071ba0fe14157a0f715a761e1691"}, - {file = "aria2-0.0.1b0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:0f87a26658c44668d4aadaf50c0342c63b04814032f51bf533bdeacc024ed1e2"}, - {file = "aria2-0.0.1b0-py3-none-win32.whl", hash = "sha256:4e73cfb52fd61aedfb7e8e23942b50eebb3081c452ece8acb682622dfcdb709d"}, - {file = "aria2-0.0.1b0-py3-none-win_amd64.whl", hash = "sha256:00aa00c9f8b709d37849a84760709d31b6c7752007f645f8bea175325c0b14cb"}, - {file = "aria2-0.0.1b0.tar.gz", hash = "sha256:332b31b5caafb381c43112de01f01cade19de5c68b515c37d971bb147edf56f8"}, -] - [[package]] name = "attrs" version = "24.2.0" @@ -866,34 +849,39 @@ files = [ [[package]] name = "fairseq" -version = "0.12.2" +version = "0.12.3" description = "Facebook AI Research Sequence-to-Sequence Toolkit" optional = false python-versions = "*" groups = ["main"] -files = [ - {file = "fairseq-0.12.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fe65b07c5121b7cda0c7a17166994a6b0059259ce37881b6daa117b8c209b662"}, - {file = "fairseq-0.12.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0543905012e39f00bd8c3f3781d9f49e76ab309801eb2eb7de250f5984df0de3"}, - {file = "fairseq-0.12.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4877d65346797fc580a3a7e6e2364d2331a0026ef099c22eb8311441e49c2c6"}, - {file = "fairseq-0.12.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:26454f334ca705c67f898846dff34e14c148fcdaf53b4f52d64209773b509347"}, - {file = "fairseq-0.12.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b8c8b6dc368d2fd23a06ff613a2af05959eee275fe90846d7cffef4a43c522a"}, - {file = "fairseq-0.12.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:08fa308c760f995cdc13d9c385e2b9d923a78b48275d8b4d78f3a854c71a8f29"}, - {file = "fairseq-0.12.2.tar.gz", hash = "sha256:34f1b18426bf3844714534162f065ab733e049597476daa35fffb4d06a92b524"}, -] +files = [] +develop = false [package.dependencies] bitarray = "*" cffi = "*" cython = "*" -hydra-core = ">=1.0.7,<1.1" -numpy = {version = "*", markers = "python_version >= \"3.7\""} -omegaconf = "<2.1" +hydra-core = ">=1.3.2" +numpy = ">=1.21.3" +omegaconf = "*" +packaging = "*" regex = "*" sacrebleu = ">=1.4.12" -torch = "*" +scikit-learn = "*" +torch = ">=1.13" torchaudio = ">=0.8.0" tqdm = "*" +[package.extras] +dev = ["black (==22.3.0)", "flake8", "pytest"] +docs = ["sphinx", "sphinx-argparse"] + +[package.source] +type = "git" +url = "https://github.com/One-sixth/fairseq.git" +reference = "HEAD" +resolved_reference = "44800430a728c2216fd1cf1e8daa672f50dfacba" + [[package]] name = "faiss-cpu" version = "1.7.3" @@ -1445,19 +1433,20 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "hydra-core" -version = "1.0.7" +version = "1.3.2" description = "A framework for elegantly configuring complex applications" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "hydra-core-1.0.7.tar.gz", hash = "sha256:58cc3f7531995b6d8de162ca21f936e17bdaebd4d1e8614d63c32e17c2e41e45"}, - {file = "hydra_core-1.0.7-py3-none-any.whl", hash = "sha256:e800c6deb8309395508094851fa93bc13408f2285261eb97e626d37193b58a9f"}, + {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"}, + {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"}, ] [package.dependencies] -antlr4-python3-runtime = "4.8" -omegaconf = ">=2.0.5,<2.1" +antlr4-python3-runtime = "==4.9.*" +omegaconf = ">=2.2,<2.4" +packaging = "*" [[package]] name = "idna" @@ -1665,34 +1654,56 @@ files = [ ] [[package]] -name = "librosa" -version = "0.9.1" -description = "Python module for audio and music processing" +name = "lazy-loader" +version = "0.4" +description = "Makes it easy to load subpackages and functions on demand." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" groups = ["main"] files = [ - {file = "librosa-0.9.1-py3-none-any.whl", hash = "sha256:c2bb61a8008367cca89a3f1dad352d8e55fe5ca5f7414fb5d5258eb52765db33"}, - {file = "librosa-0.9.1.tar.gz", hash = "sha256:7ed5d6e3f4546e5e3c2840691f9ddc56878f914a35a50060df5fca2b26d4b614"}, + {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"}, + {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"}, ] [package.dependencies] -audioread = ">=2.1.5" -decorator = ">=4.0.10" +packaging = "*" + +[package.extras] +dev = ["changelist (==0.5)"] +lint = ["pre-commit (==3.7.0)"] +test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] + +[[package]] +name = "librosa" +version = "0.10.2" +description = "Python module for audio and music processing" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "librosa-0.10.2-py3-none-any.whl", hash = "sha256:469470592c51533e22d3caa1356c735de9bb0a2e9d29345ab4a1ed52e85f7dc8"}, + {file = "librosa-0.10.2.tar.gz", hash = "sha256:444693b66b675f7e2f209cfc4f1226f54a3461f8568816e5a1add068101c3888"}, +] + +[package.dependencies] +audioread = ">=2.1.9" +decorator = ">=4.3.0" joblib = ">=0.14" -numba = ">=0.45.1" -numpy = ">=1.17.0" -packaging = ">=20.0" -pooch = ">=1.0" -resampy = ">=0.2.2" -scikit-learn = ">=0.19.1" +lazy-loader = ">=0.1" +msgpack = ">=1.0" +numba = ">=0.51.0" +numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2" +pooch = ">=1.1" +scikit-learn = ">=0.20.0" scipy = ">=1.2.0" -soundfile = ">=0.10.2" +soundfile = ">=0.12.1" +soxr = ">=0.3.2" +typing-extensions = ">=4.1.1" [package.extras] display = ["matplotlib (>=3.3.0)"] -docs = ["ipython (>=7.0)", "matplotlib (>=3.3.0)", "mir-eval (>=0.5)", "numba (<0.50)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (==0.5.*)", "sphinxcontrib-svg2pdfconverter"] -tests = ["contextlib2", "matplotlib (>=3.3.0)", "pytest", "pytest-cov", "pytest-mpl", "samplerate", "soxr"] +docs = ["ipython (>=7.0)", "matplotlib (>=3.3.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"] +tests = ["matplotlib (>=3.3.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"] [[package]] name = "linkify-it-py" @@ -2147,6 +2158,80 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] +[[package]] +name = "msgpack" +version = "1.1.0" +description = "MessagePack serializer" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ad442d527a7e358a469faf43fda45aaf4ac3249c8310a82f0ccff9164e5dccd"}, + {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74bed8f63f8f14d75eec75cf3d04ad581da6b914001b474a5d3cd3372c8cc27d"}, + {file = "msgpack-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:914571a2a5b4e7606997e169f64ce53a8b1e06f2cf2c3a7273aa106236d43dd5"}, + {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921af52214dcbb75e6bdf6a661b23c3e6417f00c603dd2070bccb5c3ef499f5"}, + {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8ce0b22b890be5d252de90d0e0d119f363012027cf256185fc3d474c44b1b9e"}, + {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73322a6cc57fcee3c0c57c4463d828e9428275fb85a27aa2aa1a92fdc42afd7b"}, + {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1f3c3d21f7cf67bcf2da8e494d30a75e4cf60041d98b3f79875afb5b96f3a3f"}, + {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64fc9068d701233effd61b19efb1485587560b66fe57b3e50d29c5d78e7fef68"}, + {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:42f754515e0f683f9c79210a5d1cad631ec3d06cea5172214d2176a42e67e19b"}, + {file = "msgpack-1.1.0-cp310-cp310-win32.whl", hash = "sha256:3df7e6b05571b3814361e8464f9304c42d2196808e0119f55d0d3e62cd5ea044"}, + {file = "msgpack-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:685ec345eefc757a7c8af44a3032734a739f8c45d1b0ac45efc5d8977aa4720f"}, + {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d364a55082fb2a7416f6c63ae383fbd903adb5a6cf78c5b96cc6316dc1cedc7"}, + {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79ec007767b9b56860e0372085f8504db5d06bd6a327a335449508bbee9648fa"}, + {file = "msgpack-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6ad622bf7756d5a497d5b6836e7fc3752e2dd6f4c648e24b1803f6048596f701"}, + {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e59bca908d9ca0de3dc8684f21ebf9a690fe47b6be93236eb40b99af28b6ea6"}, + {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1da8f11a3dd397f0a32c76165cf0c4eb95b31013a94f6ecc0b280c05c91b59"}, + {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452aff037287acb1d70a804ffd022b21fa2bb7c46bee884dbc864cc9024128a0"}, + {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8da4bf6d54ceed70e8861f833f83ce0814a2b72102e890cbdfe4b34764cdd66e"}, + {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:41c991beebf175faf352fb940bf2af9ad1fb77fd25f38d9142053914947cdbf6"}, + {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a52a1f3a5af7ba1c9ace055b659189f6c669cf3657095b50f9602af3a3ba0fe5"}, + {file = "msgpack-1.1.0-cp311-cp311-win32.whl", hash = "sha256:58638690ebd0a06427c5fe1a227bb6b8b9fdc2bd07701bec13c2335c82131a88"}, + {file = "msgpack-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd2906780f25c8ed5d7b323379f6138524ba793428db5d0e9d226d3fa6aa1788"}, + {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d"}, + {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2"}, + {file = "msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420"}, + {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2"}, + {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39"}, + {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f"}, + {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247"}, + {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c"}, + {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b"}, + {file = "msgpack-1.1.0-cp312-cp312-win32.whl", hash = "sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b"}, + {file = "msgpack-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f"}, + {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf"}, + {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330"}, + {file = "msgpack-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734"}, + {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e"}, + {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca"}, + {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915"}, + {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d"}, + {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434"}, + {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c"}, + {file = "msgpack-1.1.0-cp313-cp313-win32.whl", hash = "sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc"}, + {file = "msgpack-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f"}, + {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c40ffa9a15d74e05ba1fe2681ea33b9caffd886675412612d93ab17b58ea2fec"}, + {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1ba6136e650898082d9d5a5217d5906d1e138024f836ff48691784bbe1adf96"}, + {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0856a2b7e8dcb874be44fea031d22e5b3a19121be92a1e098f46068a11b0870"}, + {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:471e27a5787a2e3f974ba023f9e265a8c7cfd373632247deb225617e3100a3c7"}, + {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:646afc8102935a388ffc3914b336d22d1c2d6209c773f3eb5dd4d6d3b6f8c1cb"}, + {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:13599f8829cfbe0158f6456374e9eea9f44eee08076291771d8ae93eda56607f"}, + {file = "msgpack-1.1.0-cp38-cp38-win32.whl", hash = "sha256:8a84efb768fb968381e525eeeb3d92857e4985aacc39f3c47ffd00eb4509315b"}, + {file = "msgpack-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:879a7b7b0ad82481c52d3c7eb99bf6f0645dbdec5134a4bddbd16f3506947feb"}, + {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:53258eeb7a80fc46f62fd59c876957a2d0e15e6449a9e71842b6d24419d88ca1"}, + {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e7b853bbc44fb03fbdba34feb4bd414322180135e2cb5164f20ce1c9795ee48"}, + {file = "msgpack-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3e9b4936df53b970513eac1758f3882c88658a220b58dcc1e39606dccaaf01c"}, + {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46c34e99110762a76e3911fc923222472c9d681f1094096ac4102c18319e6468"}, + {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a706d1e74dd3dea05cb54580d9bd8b2880e9264856ce5068027eed09680aa74"}, + {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:534480ee5690ab3cbed89d4c8971a5c631b69a8c0883ecfea96c19118510c846"}, + {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8cf9e8c3a2153934a23ac160cc4cba0ec035f6867c8013cc6077a79823370346"}, + {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3180065ec2abbe13a4ad37688b61b99d7f9e012a535b930e0e683ad6bc30155b"}, + {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c5a91481a3cc573ac8c0d9aace09345d989dc4a0202b7fcb312c88c26d4e71a8"}, + {file = "msgpack-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f80bc7d47f76089633763f952e67f8214cb7b3ee6bfa489b3cb6a84cfac114cd"}, + {file = "msgpack-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:4d1b7ff2d6146e16e8bd665ac726a89c74163ef8cd39fa8c1087d4e52d3a2325"}, + {file = "msgpack-1.1.0.tar.gz", hash = "sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e"}, +] + [[package]] name = "multidict" version = "6.0.5" @@ -2363,7 +2448,7 @@ description = "CUBLAS native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"}, @@ -2376,7 +2461,7 @@ description = "CUDA profiling tools runtime libs." optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"}, @@ -2389,7 +2474,7 @@ description = "NVRTC native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"}, @@ -2402,27 +2487,12 @@ description = "CUDA Runtime native Libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"}, ] -[[package]] -name = "nvidia-cudnn-cu12" -version = "8.9.2.26" -description = "cuDNN runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\"" -files = [ - {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, -] - -[package.dependencies] -nvidia-cublas-cu12 = "*" - [[package]] name = "nvidia-cudnn-cu12" version = "9.1.0.70" @@ -2430,7 +2500,7 @@ description = "cuDNN runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"}, {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"}, @@ -2446,7 +2516,7 @@ description = "CUFFT native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"}, @@ -2459,7 +2529,7 @@ description = "CURAND native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"}, @@ -2472,7 +2542,7 @@ description = "CUDA solver native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"}, @@ -2490,7 +2560,7 @@ description = "CUSPARSE native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"}, @@ -2499,18 +2569,6 @@ files = [ [package.dependencies] nvidia-nvjitlink-cu12 = "*" -[[package]] -name = "nvidia-nccl-cu12" -version = "2.18.1" -description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\"" -files = [ - {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"}, -] - [[package]] name = "nvidia-nccl-cu12" version = "2.20.5" @@ -2518,7 +2576,7 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, @@ -2531,7 +2589,7 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_aarch64.whl", hash = "sha256:84fb38465a5bc7c70cbc320cfd0963eb302ee25a5e939e9f512bbba55b6072fb"}, {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl", hash = "sha256:562ab97ea2c23164823b2a89cb328d01d45cb99634b8c65fe7cd60d14562bd79"}, @@ -2545,7 +2603,7 @@ description = "NVIDIA Tools Extension" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, @@ -2553,19 +2611,19 @@ files = [ [[package]] name = "omegaconf" -version = "2.0.6" +version = "2.3.0" description = "A flexible configuration library" optional = false python-versions = ">=3.6" groups = ["main"] files = [ - {file = "omegaconf-2.0.6-py3-none-any.whl", hash = "sha256:9e349fd76819b95b47aa628edea1ff83fed5b25108608abdd6c7fdca188e302a"}, - {file = "omegaconf-2.0.6.tar.gz", hash = "sha256:92ca535a788d21651bf4c2eaf5c1ca4c7a8003b2dab4a87cbb09109784268806"}, + {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"}, + {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, ] [package.dependencies] -PyYAML = ">=5.1" -typing-extensions = "*" +antlr4-python3-runtime = "==4.9.*" +PyYAML = ">=5.1.0" [[package]] name = "onnxruntime" @@ -3834,6 +3892,44 @@ cffi = ">=1.0" [package.extras] numpy = ["numpy"] +[[package]] +name = "soxr" +version = "0.5.0.post1" +description = "High quality, one-dimensional sample-rate conversion library" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "soxr-0.5.0.post1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:7406d782d85f8cf64e66b65e6b7721973de8a1dc50b9e88bc2288c343a987484"}, + {file = "soxr-0.5.0.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa0a382fb8d8e2afed2c1642723b2d2d1b9a6728ff89f77f3524034c8885b8c9"}, + {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b01d3efb95a2851f78414bcd00738b0253eec3f5a1e5482838e965ffef84969"}, + {file = "soxr-0.5.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcc049b0a151a65aa75b92f0ac64bb2dba785d16b78c31c2b94e68c141751d6d"}, + {file = "soxr-0.5.0.post1-cp310-cp310-win_amd64.whl", hash = "sha256:97f269bc26937c267a2ace43a77167d0c5c8bba5a2b45863bb6042b5b50c474e"}, + {file = "soxr-0.5.0.post1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:6fb77b626773a966e3d8f6cb24f6f74b5327fa5dc90f1ff492450e9cdc03a378"}, + {file = "soxr-0.5.0.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39e0f791ba178d69cd676485dbee37e75a34f20daa478d90341ecb7f6d9d690f"}, + {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f0b558f445ba4b64dbcb37b5f803052eee7d93b1dbbbb97b3ec1787cb5a28eb"}, + {file = "soxr-0.5.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6903671808e0a6078b0d146bb7a2952b118dfba44008b2aa60f221938ba829"}, + {file = "soxr-0.5.0.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c4d8d5283ed6f5efead0df2c05ae82c169cfdfcf5a82999c2d629c78b33775e8"}, + {file = "soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31"}, + {file = "soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32"}, + {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1"}, + {file = "soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc"}, + {file = "soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6"}, + {file = "soxr-0.5.0.post1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:c5af7b355959061beb90a1d73c4834ece4549f07b708f8c73c088153cec29935"}, + {file = "soxr-0.5.0.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1dda616fc797b1507b65486f3116ed2c929f13c722922963dd419d64ada6c07"}, + {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94de2812368e98cb42b4eaeddf8ee1657ecc19bd053f8e67b9b5aa12a3592012"}, + {file = "soxr-0.5.0.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c8e9c980637e03d3f345a4fd81d56477a58c294fb26205fa121bc4eb23d9d01"}, + {file = "soxr-0.5.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:7e71b0b0db450f36de70f1047505231db77a713f8c47df9342582ae8a4b828f2"}, + {file = "soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"] +test = ["pytest"] + [[package]] name = "starlette" version = "0.22.0" @@ -3951,101 +4047,32 @@ files = [ [[package]] name = "torch" -version = "2.1.1+cu118" +version = "2.4.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "sys_platform == \"linux\"" files = [ - {file = "torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl", hash = "sha256:f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c"}, -] - -[package.dependencies] -filelock = "*" -fsspec = "*" -jinja2 = "*" -networkx = "*" -sympy = "*" -triton = "2.1.0" -typing-extensions = "*" - -[package.extras] -dynamo = ["jinja2"] -opt-einsum = ["opt-einsum (>=3.3)"] - -[package.source] -type = "url" -url = "https://download.pytorch.org/whl/cu118/torch-2.1.1%2Bcu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c" - -[[package]] -name = "torch" -version = "2.1.1+cu118" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false -python-versions = ">=3.8.0" -groups = ["main"] -markers = "sys_platform == \"win32\"" -files = [ - {file = "torch-2.1.1+cu118-cp311-cp311-win_amd64.whl", hash = "sha256:d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a"}, -] - -[package.dependencies] -filelock = "*" -fsspec = "*" -jinja2 = "*" -networkx = "*" -nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.18.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -sympy = "*" -triton = {version = "2.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -typing-extensions = "*" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] - -[package.source] -type = "url" -url = "https://download.pytorch.org/whl/cu118/torch-2.1.1%2Bcu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a" - -[[package]] -name = "torch" -version = "2.4.0" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false -python-versions = ">=3.8.0" -groups = ["main"] -markers = "sys_platform != \"win32\" and sys_platform != \"linux\"" -files = [ - {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"}, - {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"}, - {file = "torch-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2497cbc7b3c951d69b276ca51fe01c2865db67040ac67f5fc20b03e41d16ea4a"}, - {file = "torch-2.4.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:685418ab93730efbee71528821ff54005596970dd497bf03c89204fb7e3f71de"}, - {file = "torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e743adadd8c8152bb8373543964551a7cb7cc20ba898dc8f9c0cdbe47c283de0"}, - {file = "torch-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7334325c0292cbd5c2eac085f449bf57d3690932eac37027e193ba775703c9e6"}, - {file = "torch-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:97730014da4c57ffacb3c09298c6ce05400606e890bd7a05008d13dd086e46b1"}, - {file = "torch-2.4.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f169b4ea6dc93b3a33319611fcc47dc1406e4dd539844dcbd2dec4c1b96e166d"}, - {file = "torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:997084a0f9784d2a89095a6dc67c7925e21bf25dea0b3d069b41195016ccfcbb"}, - {file = "torch-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:bc3988e8b36d1e8b998d143255d9408d8c75da4ab6dd0dcfd23b623dfb0f0f57"}, - {file = "torch-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:3374128bbf7e62cdaed6c237bfd39809fbcfaa576bee91e904706840c3f2195c"}, - {file = "torch-2.4.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:91aaf00bfe1ffa44dc5b52809d9a95129fca10212eca3ac26420eb11727c6288"}, - {file = "torch-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cc30457ea5489c62747d3306438af00c606b509d78822a88f804202ba63111ed"}, - {file = "torch-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a046491aaf96d1215e65e1fa85911ef2ded6d49ea34c8df4d0638879f2402eef"}, - {file = "torch-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:688eec9240f3ce775f22e1e1a5ab9894f3d5fe60f3f586deb7dbd23a46a83916"}, - {file = "torch-2.4.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:3af4de2a618fb065e78404c4ba27a818a7b7957eaeff28c6c66ce7fb504b68b8"}, - {file = "torch-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:618808d3f610d5f180e47a697d4ec90b810953bb1e020f424b2ac7fb0884b545"}, - {file = "torch-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:ed765d232d23566052ba83632ec73a4fccde00b4c94ad45d63b471b09d63b7a7"}, - {file = "torch-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2feb98ac470109472fb10dfef38622a7ee08482a16c357863ebc7bc7db7c8f7"}, - {file = "torch-2.4.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8940fc8b97a4c61fdb5d46a368f21f4a3a562a17879e932eb51a5ec62310cb31"}, + {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"}, + {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"}, + {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"}, + {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"}, + {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"}, + {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"}, + {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"}, + {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"}, + {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"}, + {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"}, + {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"}, + {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"}, + {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"}, + {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"}, + {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"}, + {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"}, + {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"}, + {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"}, + {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"}, + {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"}, ] [package.dependencies] @@ -4064,6 +4091,7 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \" nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = "*" sympy = "*" triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""} typing-extensions = ">=4.8.0" @@ -4073,94 +4101,80 @@ opt-einsum = ["opt-einsum (>=3.3)"] optree = ["optree (>=0.11.0)"] [[package]] -name = "torchaudio" -version = "2.1.1+cu118" -description = "An audio package for PyTorch" +name = "torch-directml" +version = "0.2.5.dev240914" +description = "A DirectML backend for hardware acceleration in PyTorch." optional = false -python-versions = "*" +python-versions = ">=3.7" groups = ["main"] -markers = "sys_platform == \"linux\"" files = [ - {file = "torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl", hash = "sha256:2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4"}, + {file = "torch_directml-0.2.5.dev240914-cp310-cp310-manylinux2010_x86_64.whl", hash = "sha256:30e29872f4d6059dd784897fa2df0cddb80f3874ed4f0860758790286e95a823"}, + {file = "torch_directml-0.2.5.dev240914-cp310-cp310-win_amd64.whl", hash = "sha256:9fe494ffd2c8ab9079f13404d052fc261cd8efb639a776c9075e58d9c64d6cb2"}, + {file = "torch_directml-0.2.5.dev240914-cp311-cp311-manylinux2010_x86_64.whl", hash = "sha256:6afb675585d30018c813e5ba203a3437073748919af8ab3e910092a0e0ec531f"}, + {file = "torch_directml-0.2.5.dev240914-cp311-cp311-win_amd64.whl", hash = "sha256:3315b6c7e898685827607f1d8170dacc386ac248502aba9bd36cf82e78d930bb"}, + {file = "torch_directml-0.2.5.dev240914-cp312-cp312-manylinux2010_x86_64.whl", hash = "sha256:26915aff5008a8567ea7641b74cf8cb53c1767d0c7163fc06e0a587e7c1c9dce"}, + {file = "torch_directml-0.2.5.dev240914-cp312-cp312-win_amd64.whl", hash = "sha256:ea19d11e33e9450b290311c06f7eb10924dd25c555e504d367b7b437d3eb24d0"}, + {file = "torch_directml-0.2.5.dev240914-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:9b263c7d88ea225ce35b116441e1585fc753e9d69f64a8b6d83aabdc6c511517"}, + {file = "torch_directml-0.2.5.dev240914-cp38-cp38-win_amd64.whl", hash = "sha256:6e18fb706d15cc6d0d3de49f46a7edc07ae669531c851d7d8f98855f4974f9e6"}, + {file = "torch_directml-0.2.5.dev240914-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:9c1988e95694520b31f1056bc038b76a62c9ca63f2d4e93f1abea1d293ec49ce"}, + {file = "torch_directml-0.2.5.dev240914-cp39-cp39-win_amd64.whl", hash = "sha256:488509f0e8deb22f052b56f5cdad3a55878b65a7d99ee4b448fb4ab3cbb8d8ea"}, ] [package.dependencies] -torch = "2.1.1" - -[package.source] -type = "url" -url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1%2Bcu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4" +torch = "2.4.1" +torchvision = "0.19.1" [[package]] name = "torchaudio" -version = "2.1.1+cu118" +version = "2.4.1" description = "An audio package for PyTorch" optional = false python-versions = "*" groups = ["main"] -markers = "sys_platform == \"win32\"" files = [ - {file = "torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl", hash = "sha256:79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2"}, + {file = "torchaudio-2.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661909751909340b24f637410dfec02a888867816c3db19ed4f4102ae105244a"}, + {file = "torchaudio-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bfc234cef1d03092ea27440fb79e486722ccb41cff94ebaf9d5a1082436395fe"}, + {file = "torchaudio-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:54431179d9a9ccf3feeae98aace07d89fae9fd728e2bc8656efbd70e7edcc6f8"}, + {file = "torchaudio-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:dec97872215c3122b7718ec47ac63e143565c3cced06444d0225e98bf4dd4b5f"}, + {file = "torchaudio-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60af1531815d22659e5412ea401bed552a16c389938c49664e446e4cfd5ddc06"}, + {file = "torchaudio-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:95a0968569f7f4455bfd242bfcd489ec47ad37d2ba0f3d9f738cd1128a5f775c"}, + {file = "torchaudio-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7640aaffb2056e12f2906187b03a22228a0908c87d0295fddf4b0b92334a290b"}, + {file = "torchaudio-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:3c08b42a0c296c8eeee6c533bcae5cfbc0ceae86a34f24fe6bbbb5faa7a7bea1"}, + {file = "torchaudio-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:953946cf610ffd57bb3fdd228effa2112fa51c5dfe36a96611effc9074a3d3be"}, + {file = "torchaudio-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:1796a8961decb522c47daab0fbe27c057d6d143ee22bb6ae0d5eb9b2a038c7b6"}, + {file = "torchaudio-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:5b62fc7b16ed708b0c07d4393137797e92f63fc3bd5705607d97ba6a9a7cf3f0"}, + {file = "torchaudio-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:d721b186aae7bd8752c9ad95213f5d650926597bb9060728dfe476986a1ff570"}, + {file = "torchaudio-2.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4ea0fd00142fe795c75bcc20a303981b56f2327c7f7d321b42a8fef1d78aafa9"}, + {file = "torchaudio-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:375d8740c8035a50faca7a5afe2fbdb712aa8733715b971b2af61b4003fa1c41"}, + {file = "torchaudio-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:74d19cf9ca3dad394afcabb7e6f7ed9ab9f59f2540d502826c7ec3e33985251d"}, + {file = "torchaudio-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:40e9fa8fdc8d328ea4aa90be65fd34c5ef975610dbd707545e3664393a8a2497"}, + {file = "torchaudio-2.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3adce550850902b9aa6cd2378ccd720ac9ec8cf31e2eba9743ccc84ffcbe76d6"}, + {file = "torchaudio-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:98d8e03703f96b13a8d172d1ccdc7badb338227fd762985fdcea6b30f6697bdb"}, + {file = "torchaudio-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:36c7e7bc6b358cbf42b769c80206780fa1497d141a985c6b3e7768de44524e9a"}, + {file = "torchaudio-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:f46e34ab3866ad8d8ace0673cd11e697c5cde6a3b7a4d8d789207d4d8badbb6e"}, ] [package.dependencies] -torch = "2.1.1+cu118" - -[package.source] -type = "url" -url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1%2Bcu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2" - -[[package]] -name = "torchaudio" -version = "2.4.0" -description = "An audio package for PyTorch" -optional = false -python-versions = "*" -groups = ["main"] -markers = "sys_platform != \"win32\" and sys_platform != \"linux\"" -files = [ - {file = "torchaudio-2.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:733e9d859b88dabefeaf008e3ab2b8c7885b29466068b4b79a42766be4619e46"}, - {file = "torchaudio-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:c48bab82a9ee0c67b9323c2ebbe0890a34c5815d1ff1ace77b1c9df4e6fdbbff"}, - {file = "torchaudio-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:17cb73d4336771d455cd8dda8b4891307a5346b890a4e6b1d4b73d565258fee1"}, - {file = "torchaudio-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:af19edc1c3c0ac626f594fc67f087db401016d9216af8d62b6c6ff731efbae43"}, - {file = "torchaudio-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:acbcf9129ffcfce808254e2cbff103363c505ce06ed4c4231b3f436a10679d4d"}, - {file = "torchaudio-2.4.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:393ee8c24110ccc8030482c10cd9d5d0b5e528f6a9dd3d60557e1151aa951b13"}, - {file = "torchaudio-2.4.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:be969c09466db35e0d79b8b09dff66caedbb9569b42c903a2d5e0db2af760e3c"}, - {file = "torchaudio-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:2993a3288b2b451bf90c7c4d65991b5769e2614d923e295f08a10066ce79d3c0"}, - {file = "torchaudio-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ae13a95ef6fabcadb0eff36d85f5048d70474a2e9704fa9c86e9903cbcec0d4a"}, - {file = "torchaudio-2.4.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:4782a49171d94431bb815a55aa72733f5fe38034bdf6adeced28c226e2cc791b"}, - {file = "torchaudio-2.4.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:534d1907bb252ecd2ba9e1d61cff7220fd66090e63df7b3c109cea77a19d4cb8"}, - {file = "torchaudio-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:473c149c1c5288f4ce7b609c5ecb7b2528e7958ea701147a20413d65e5a8a59c"}, - {file = "torchaudio-2.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fc3f8ecd6f0bbfc654d3bc52756a7ca359f1d88b4fa0290e1cdb763a3131b7b9"}, - {file = "torchaudio-2.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:2bcd9700f8ec70804cc9c48d4f6f3fa7372f52421eebb64d02c04bf805ad284d"}, - {file = "torchaudio-2.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d7fe9e7f2fe8250fde07b20356c44d770d5faa3ca277abdcda3af7d484048fba"}, - {file = "torchaudio-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:6a10d3c29097a4d81533ab79e351c93d6d91eb1584671d5eee59ba3c259be796"}, - {file = "torchaudio-2.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1fd670c808e322c101957a07651e29935f86ec389243c0c43a24edd7a1854841"}, - {file = "torchaudio-2.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1eecb83c123577779a45381de3a38e4add132a80104cff4afd816913f51ca17b"}, - {file = "torchaudio-2.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c840894de12a6dd3ea57cbb0d0086123aaa48001ba3ad99ef714fe009eae8eb9"}, - {file = "torchaudio-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:608fd609cdd8323ef4a50c1e984a0be7282a6c630fad22e040e957f8e376950e"}, -] - -[package.dependencies] -torch = "2.4.0" +torch = "2.4.1" [[package]] name = "torchcrepe" -version = "0.0.20" +version = "0.0.23" description = "Pytorch implementation of CREPE pitch tracker" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "torchcrepe-0.0.20-py3-none-any.whl", hash = "sha256:a8803df94cd7675c0ee1f8f6ca6ea78075bca0825497632914b76e0dd5b61d31"}, - {file = "torchcrepe-0.0.20.tar.gz", hash = "sha256:b4eac23d0a85582739fe69eeb3136e2fb6ebb7e5363a6556f7c9fe83b11f8e2c"}, + {file = "torchcrepe-0.0.23-py3-none-any.whl", hash = "sha256:6e104465b89e763ba7fd0d1b228162783ed4b5a6c5735772baca286c20d7ae2c"}, + {file = "torchcrepe-0.0.23.tar.gz", hash = "sha256:8f7e75638a5ab3fbb9cfc1704c173adbcb61de73dbdadcf428b3d93b54dfe57f"}, ] [package.dependencies] -librosa = "0.9.1" +librosa = ">=0.9.1" resampy = "*" scipy = "*" torch = "*" +torchaudio = "*" tqdm = "*" [[package]] @@ -4181,6 +4195,45 @@ numpy = "*" torch = "*" torchaudio = "*" +[[package]] +name = "torchvision" +version = "0.19.1" +description = "image and video datasets and models for torch deep learning" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608"}, + {file = "torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed"}, + {file = "torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04"}, + {file = "torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145"}, + {file = "torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787"}, + {file = "torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050"}, + {file = "torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8"}, + {file = "torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c"}, + {file = "torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d"}, + {file = "torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c"}, + {file = "torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9"}, + {file = "torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289"}, + {file = "torchvision-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4e4f5b24ea6b087b02ed492ab1e21bba3352c4577e2def14248cfc60732338"}, + {file = "torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9281d63ead929bb19143731154cd1d8bf0b5e9873dff8578a40e90a6bec3c6fa"}, + {file = "torchvision-0.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4d10bc9083c4d5fadd7edd7b729700a7be48dab4f62278df3bc73fa48e48a155"}, + {file = "torchvision-0.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ccf085ef1824fb9e16f1901285bf89c298c62dfd93267a39e8ee42c71255242f"}, + {file = "torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098"}, + {file = "torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4"}, + {file = "torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5"}, + {file = "torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0" +torch = "2.4.1" + +[package.extras] +gdown = ["gdown (>=4.7.3)"] +scipy = ["scipy"] + [[package]] name = "tornado" version = "6.4.1" @@ -4239,33 +4292,6 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] -[[package]] -name = "triton" -version = "2.1.0" -description = "A language and compiler for custom Deep Learning operations" -optional = false -python-versions = "*" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"win32\" or sys_platform == \"linux\"" -files = [ - {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"}, - {file = "triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:919b06453f0033ea52c13eaf7833de0e57db3178d23d4e04f9fc71c4f2c32bf8"}, - {file = "triton-2.1.0-0-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae4bb8a91de790e1866405211c4d618379781188f40d5c4c399766914e84cd94"}, - {file = "triton-2.1.0-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39f6fb6bdccb3e98f3152e3fbea724f1aeae7d749412bbb1fa9c441d474eba26"}, - {file = "triton-2.1.0-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21544e522c02005a626c8ad63d39bdff2f31d41069592919ef281e964ed26446"}, - {file = "triton-2.1.0-0-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:143582ca31dd89cd982bd3bf53666bab1c7527d41e185f9e3d8a3051ce1b663b"}, - {file = "triton-2.1.0-0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82fc5aeeedf6e36be4e4530cbdcba81a09d65c18e02f52dc298696d45721f3bd"}, - {file = "triton-2.1.0-0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81a96d110a738ff63339fc892ded095b31bd0d205e3aace262af8400d40b6fa8"}, -] - -[package.dependencies] -filelock = "*" - -[package.extras] -build = ["cmake (>=3.18)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"] -tutorials = ["matplotlib", "pandas", "tabulate"] - [[package]] name = "triton" version = "3.0.0" @@ -4273,7 +4299,7 @@ description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform != \"win32\" and sys_platform != \"linux\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"}, {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"}, @@ -4573,4 +4599,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.12" -content-hash = "11dff0dae7badee83fcbe401167de506736348a975eebf3f9e3015f3b3c78584" +content-hash = "5803c6efb57eca486961794f233953c199047b36c5e823e7310488a6adc91fdd" diff --git a/pyproject.toml b/pyproject.toml index 8290789..58d7d35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,18 +60,18 @@ av = "*" # --------------------------------------------------------------------------- # --- NVIDIA GPU configuration --- -torch = [ - { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a", markers = "sys_platform == 'win32'" }, - { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c", markers = "sys_platform == 'linux'" } -] -torchaudio = [ - { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2", markers = "sys_platform == 'win32'" }, - { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4", markers = "sys_platform == 'linux'" } -] +# torch = [ + # { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=d99be44487d3ed0f7e6ef5d6689a37fb4a2f2821a9e7b59e7e04002a876a667a", markers = "sys_platform == 'win32'" }, + # { url = "https://download.pytorch.org/whl/cu118/torch-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=f3c0ba02b50d0021ff26f030e22d4c45965537cf91f322e52a65b8c58396f81c", markers = "sys_platform == 'linux'" } +# ] +# torchaudio = [ + # { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-win_amd64.whl#sha256=79b5afa556063be18de4a1964339242301fe04e782e1030a22695257dd9afbd2", markers = "sys_platform == 'win32'" }, + # { url = "https://download.pytorch.org/whl/cu118/torchaudio-2.1.1+cu118-cp311-cp311-linux_x86_64.whl#sha256=2b077639f240176bb27e964e2e9b3a5c2a8d560a3a7bc1ffd0a024e81f2e10b4", markers = "sys_platform == 'linux'" } +# ] # --- AMD GPU configuration --- -# torch = "2.4.1" -# torchaudio = "2.4.1" -# torch-directml = "^0.2.5.dev240914" +torch = "2.4.1" +torchaudio = "2.4.1" +torch-directml = "^0.2.5.dev240914" # --------------------------------------------------------------------------- [tool.poetry.group.dev.dependencies] diff --git a/run.sh b/run.sh deleted file mode 100755 index f239307..0000000 --- a/run.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh - -if [ "$(uname)" = "Darwin" ]; then - # macOS specific env: - export PYTORCH_ENABLE_MPS_FALLBACK=1 - export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 -elif [ "$(uname)" != "Linux" ]; then - echo "Unsupported operating system." - exit 1 -fi - -if [ -d ".venv" ]; then - echo "Activate venv..." - . .venv/bin/activate -else - echo "Create venv..." - requirements_file="requirements.txt" - - # Check if Python 3.8 is installed - if ! command -v python3.8 >/dev/null 2>&1 || pyenv versions --bare | grep -q "3.8"; then - echo "Python 3 not found. Attempting to install 3.8..." - if [ "$(uname)" = "Darwin" ] && command -v brew >/dev/null 2>&1; then - brew install python@3.8 - elif [ "$(uname)" = "Linux" ] && command -v apt-get >/dev/null 2>&1; then - sudo apt-get update - sudo apt-get install python3.8 - else - echo "Please install Python 3.8 manually." - exit 1 - fi - fi - - python3.8 -m venv .venv - . .venv/bin/activate - - # Check if required packages are installed and install them if not - if [ -f "${requirements_file}" ]; then - installed_packages=$(python3.8 -m pip freeze) - while IFS= read -r package; do - expr "${package}" : "^#.*" > /dev/null && continue - package_name=$(echo "${package}" | sed 's/[<>=!].*//') - if ! echo "${installed_packages}" | grep -q "${package_name}"; then - echo "${package_name} not found. Attempting to install..." - python3.8 -m pip install --upgrade "${package}" - fi - done < "${requirements_file}" - else - echo "${requirements_file} not found. Please ensure the requirements file with required packages exists." - exit 1 - fi -fi - -# Download models -chmod +x tools/dlmodels.sh -./tools/dlmodels.sh - -if [ $? -ne 0 ]; then - exit 1 -fi - -# Run the main script -python3.8 infer-web.py --pycmd python3.8 diff --git a/tools/dlmodels.bat b/tools/dlmodels.bat deleted file mode 100644 index b83825a..0000000 --- a/tools/dlmodels.bat +++ /dev/null @@ -1,362 +0,0 @@ -@echo off && chcp 65001 - -echo working dir is %cd% -echo downloading requirement aria2 check. -echo= -dir /a:d/b | findstr "aria2" > flag.txt -findstr "aria2" flag.txt >nul -if %errorlevel% ==0 ( - echo aria2 checked. - echo= -) else ( - echo failed. please downloading aria2 from webpage! - echo unzip it and put in this directory! - timeout /T 5 - start https://github.com/aria2/aria2/releases/tag/release-1.36.0 - echo= - goto end -) - -echo envfiles checking start. -echo= - -for /f %%x in ('findstr /i /c:"aria2" "flag.txt"') do (set aria2=%%x)&goto endSch -:endSch - -set d32=f0D32k.pth -set d40=f0D40k.pth -set d48=f0D48k.pth -set g32=f0G32k.pth -set g40=f0G40k.pth -set g48=f0G48k.pth - -set d40v2=f0D40k.pth -set g40v2=f0G40k.pth - -set dld32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -set dld40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -set dld48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -set dlg32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -set dlg40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -set dlg48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth - -set dld40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -set dlg40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth - -set hp2_all=HP2_all_vocals.pth -set hp3_all=HP3_all_vocals.pth -set hp5_only=HP5_only_main_vocal.pth -set VR_DeEchoAggressive=VR-DeEchoAggressive.pth -set VR_DeEchoDeReverb=VR-DeEchoDeReverb.pth -set VR_DeEchoNormal=VR-DeEchoNormal.pth -set onnx_dereverb=vocals.onnx - -set dlhp2_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth -set dlhp3_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth -set dlhp5_only=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth -set dlVR_DeEchoAggressive=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth -set dlVR_DeEchoDeReverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth -set dlVR_DeEchoNormal=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth -set dlonnx_dereverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx - -set hb=hubert_base.pt - -set dlhb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt - -set rmvpe=rmvpe.pt -set dlrmvpe=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt - -echo dir check start. -echo= - -if exist "%~dp0assets\pretrained" ( - echo dir .\assets\pretrained checked. - ) else ( - echo failed. generating dir .\assets\pretrained. - mkdir pretrained - ) -if exist "%~dp0assets\pretrained_v2" ( - echo dir .\assets\pretrained_v2 checked. - ) else ( - echo failed. generating dir .\assets\pretrained_v2. - mkdir pretrained_v2 - ) -if exist "%~dp0assets\uvr5_weights" ( - echo dir .\assets\uvr5_weights checked. - ) else ( - echo failed. generating dir .\assets\uvr5_weights. - mkdir uvr5_weights - ) -if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy" ( - echo dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked. - ) else ( - echo failed. generating dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy. - mkdir uvr5_weights\onnx_dereverb_By_FoxJoy - ) - -echo= -echo dir check finished. - -echo= -echo required files check start. - -echo checking D32k.pth -if exist "%~dp0assets\pretrained\D32k.pth" ( - echo D32k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d %~dp0assets\pretrained -o D32k.pth - if exist "%~dp0assets\pretrained\D32k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking D40k.pth -if exist "%~dp0assets\pretrained\D40k.pth" ( - echo D40k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d %~dp0assets\pretrained -o D40k.pth - if exist "%~dp0assets\pretrained\D40k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking D40k.pth -if exist "%~dp0assets\pretrained_v2\D40k.pth" ( - echo D40k.pth in .\assets\pretrained_v2 checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d %~dp0assets\pretrained_v2 -o D40k.pth - if exist "%~dp0assets\pretrained_v2\D40k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking D48k.pth -if exist "%~dp0assets\pretrained\D48k.pth" ( - echo D48k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d %~dp0assets\pretrained -o D48k.pth - if exist "%~dp0assets\pretrained\D48k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking G32k.pth -if exist "%~dp0assets\pretrained\G32k.pth" ( - echo G32k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d %~dp0assets\pretrained -o G32k.pth - if exist "%~dp0assets\pretrained\G32k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking G40k.pth -if exist "%~dp0assets\pretrained\G40k.pth" ( - echo G40k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d %~dp0assets\pretrained -o G40k.pth - if exist "%~dp0assets\pretrained\G40k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking G40k.pth -if exist "%~dp0assets\pretrained_v2\G40k.pth" ( - echo G40k.pth in .\assets\pretrained_v2 checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d %~dp0assets\pretrained_v2 -o G40k.pth - if exist "%~dp0assets\pretrained_v2\G40k.pth" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking G48k.pth -if exist "%~dp0assets\pretrained\G48k.pth" ( - echo G48k.pth in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d %~dp0assets\pretrained -o G48k.pth - if exist "%~dp0assets\pretrained\G48k.pth" (echo download successful.) else (echo please try again! - echo=) - ) - -echo checking %d32% -if exist "%~dp0assets\pretrained\%d32%" ( - echo %d32% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld32% -d %~dp0assets\pretrained -o %d32% - if exist "%~dp0assets\pretrained\%d32%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %d40% -if exist "%~dp0assets\pretrained\%d40%" ( - echo %d40% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40% -d %~dp0assets\pretrained -o %d40% - if exist "%~dp0assets\pretrained\%d40%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %d40v2% -if exist "%~dp0assets\pretrained_v2\%d40v2%" ( - echo %d40v2% in .\assets\pretrained_v2 checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40v2% -d %~dp0assets\pretrained_v2 -o %d40v2% - if exist "%~dp0assets\pretrained_v2\%d40v2%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %d48% -if exist "%~dp0assets\pretrained\%d48%" ( - echo %d48% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld48% -d %~dp0assets\pretrained -o %d48% - if exist "%~dp0assets\pretrained\%d48%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %g32% -if exist "%~dp0assets\pretrained\%g32%" ( - echo %g32% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg32% -d %~dp0assets\pretrained -o %g32% - if exist "%~dp0assets\pretrained\%g32%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %g40% -if exist "%~dp0assets\pretrained\%g40%" ( - echo %g40% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40% -d %~dp0assets\pretrained -o %g40% - if exist "%~dp0assets\pretrained\%g40%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %g40v2% -if exist "%~dp0assets\pretrained_v2\%g40v2%" ( - echo %g40v2% in .\assets\pretrained_v2 checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40v2% -d %~dp0assets\pretrained_v2 -o %g40v2% - if exist "%~dp0assets\pretrained_v2\%g40v2%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %g48% -if exist "%~dp0assets\pretrained\%g48%" ( - echo %g48% in .\assets\pretrained checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg48% -d %~dp0assets\pretrained -o %g48% - if exist "%~dp0assets\pretrained\%g48%" (echo download successful.) else (echo please try again! - echo=) - ) - -echo checking %hp2_all% -if exist "%~dp0assets\uvr5_weights\%hp2_all%" ( - echo %hp2_all% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp2_all% -d %~dp0assets\uvr5_weights -o %hp2_all% - if exist "%~dp0assets\uvr5_weights\%hp2_all%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %hp3_all% -if exist "%~dp0assets\uvr5_weights\%hp3_all%" ( - echo %hp3_all% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp3_all% -d %~dp0assets\uvr5_weights -o %hp3_all% - if exist "%~dp0assets\uvr5_weights\%hp3_all%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %hp5_only% -if exist "%~dp0assets\uvr5_weights\%hp5_only%" ( - echo %hp5_only% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp5_only% -d %~dp0assets\uvr5_weights -o %hp5_only% - if exist "%~dp0assets\uvr5_weights\%hp5_only%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %VR_DeEchoAggressive% -if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" ( - echo %VR_DeEchoAggressive% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoAggressive% -d %~dp0assets\uvr5_weights -o %VR_DeEchoAggressive% - if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %VR_DeEchoDeReverb% -if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" ( - echo %VR_DeEchoDeReverb% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoDeReverb% -d %~dp0assets\uvr5_weights -o %VR_DeEchoDeReverb% - if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %VR_DeEchoNormal% -if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" ( - echo %VR_DeEchoNormal% in .\assets\uvr5_weights checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoNormal% -d %~dp0assets\uvr5_weights -o %VR_DeEchoNormal% - if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" (echo download successful.) else (echo please try again! - echo=) - ) -echo checking %onnx_dereverb% -if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" ( - echo %onnx_dereverb% in .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlonnx_dereverb% -d %~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy -o %onnx_dereverb% - if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (echo download successful.) else (echo please try again! - echo=) - ) - -echo checking %hb% -if exist "%~dp0assets\hubert\%hb%" ( - echo %hb% in .\assets\hubert checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhb% -d %~dp0assets\hubert\ -o %hb% - if exist "%~dp0assets\hubert\%hb%" (echo download successful.) else (echo please try again! - echo=) - ) - -echo checking %rmvpe% -if exist "%~dp0assets\rmvpe\%rmvpe%" ( - echo %rmvpe% in .\assets\rmvpe checked. - echo= - ) else ( - echo failed. starting download from huggingface. - %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlrmvpe% -d %~dp0assets\rmvpe\ -o %rmvpe% - if exist "%~dp0assets\rmvpe\%rmvpe%" (echo download successful.) else (echo please try again! - echo=) - ) - -echo required files check finished. -echo envfiles check complete. -pause -:end -del flag.txt diff --git a/tools/dlmodels.sh b/tools/dlmodels.sh deleted file mode 100755 index 9482db4..0000000 --- a/tools/dlmodels.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh - -printf "working dir is %s\n" "$PWD" -echo "downloading requirement aria2 check." - -if command -v aria2c > /dev/null 2>&1 -then - echo "aria2 command found" -else - echo "failed. please install aria2" - exit 1 -fi - -echo "dir check start." - -check_dir() { - [ -d "$1" ] && printf "dir %s checked\n" "$1" || \ - printf "failed. generating dir %s\n" "$1" && mkdir -p "$1" -} - -check_dir "./assets/pretrained" -check_dir "./assets/pretrained_v2" -check_dir "./assets/uvr5_weights" -check_dir "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy" - -echo "dir check finished." - -echo "required files check start." -check_file_pretrained() { - printf "checking %s\n" "$2" - if [ -f "./assets/""$1""/""$2""" ]; then - printf "%s in ./assets/%s checked.\n" "$2" "$1" - else - echo failed. starting download from huggingface. - if command -v aria2c > /dev/null 2>&1; then - aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$1"/"$2" -d ./assets/"$1" -o "$2" - [ -f "./assets/""$1""/""$2""" ] && echo "download successful." || { echo "please try again!" && exit 1; } - else - echo "aria2c command not found. Please install aria2c and try again." - exit 1 - fi - fi -} - -check_file_special() { - printf "checking %s\n" "$2" - if [ -f "./assets/""$1""/""$2""" ]; then - printf "%s in ./assets/%s checked.\n" "$2" "$1" - else - echo failed. starting download from huggingface. - if command -v aria2c > /dev/null 2>&1; then - aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$2" -d ./assets/"$1" -o "$2" - [ -f "./assets/""$1""/""$2""" ] && echo "download successful." || { echo "please try again!" && exit 1; } - else - echo "aria2c command not found. Please install aria2c and try again." - exit 1 - fi - fi -} - -check_file_pretrained pretrained D32k.pth -check_file_pretrained pretrained D40k.pth -check_file_pretrained pretrained D48k.pth -check_file_pretrained pretrained G32k.pth -check_file_pretrained pretrained G40k.pth -check_file_pretrained pretrained G48k.pth -check_file_pretrained pretrained_v2 f0D40k.pth -check_file_pretrained pretrained_v2 f0G40k.pth -check_file_pretrained pretrained_v2 D40k.pth -check_file_pretrained pretrained_v2 G40k.pth -check_file_pretrained uvr5_weights HP2_all_vocals.pth -check_file_pretrained uvr5_weights HP3_all_vocals.pth -check_file_pretrained uvr5_weights HP5_only_main_vocal.pth -check_file_pretrained uvr5_weights VR-DeEchoAggressive.pth -check_file_pretrained uvr5_weights VR-DeEchoDeReverb.pth -check_file_pretrained uvr5_weights VR-DeEchoNormal.pth -check_file_pretrained uvr5_weights "onnx_dereverb_By_FoxJoy/vocals.onnx" -check_file_special rmvpe rmvpe.pt -check_file_special hubert hubert_base.pt - -echo "required files check finished." diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py deleted file mode 100644 index 9a7399c..0000000 --- a/tools/rvc_for_realtime.py +++ /dev/null @@ -1,445 +0,0 @@ -from io import BytesIO -import os -import pickle -import sys -import traceback -from infer.lib import jit -from infer.lib.jit.get_synthesizer import get_synthesizer -from time import time as ttime -import fairseq -import faiss -import numpy as np -import parselmouth -import pyworld -import scipy.signal as signal -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchcrepe - -from infer.lib.infer_pack.models import ( - SynthesizerTrnMs256NSFsid, - SynthesizerTrnMs256NSFsid_nono, - SynthesizerTrnMs768NSFsid, - SynthesizerTrnMs768NSFsid_nono, -) - -now_dir = os.getcwd() -sys.path.append(now_dir) -from multiprocessing import Manager as M - -from configs.config import Config - -# config = Config() - -mm = M() - - -def printt(strr, *args): - if len(args) == 0: - print(strr) - else: - print(strr % args) - - -# config.device=torch.device("cpu")########强制cpu测试 -# config.is_half=False########强制cpu测试 -class RVC: - def __init__( - self, - key, - pth_path, - index_path, - index_rate, - n_cpu, - inp_q, - opt_q, - config: Config, - last_rvc=None, - ) -> None: - """ - 初始化 - """ - try: - if config.dml == True: - - def forward_dml(ctx, x, scale): - ctx.scale = scale - res = x.clone().detach() - return res - - fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml - # global config - self.config = config - self.inp_q = inp_q - self.opt_q = opt_q - # device="cpu"########强制cpu测试 - self.device = config.device - self.f0_up_key = key - self.f0_min = 50 - self.f0_max = 1100 - self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) - self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - self.n_cpu = n_cpu - self.use_jit = self.config.use_jit - self.is_half = config.is_half - - if index_rate != 0: - self.index = faiss.read_index(index_path) - self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) - printt("Index search enabled") - self.pth_path: str = pth_path - self.index_path = index_path - self.index_rate = index_rate - self.cache_pitch: torch.Tensor = torch.zeros( - 1024, device=self.device, dtype=torch.long - ) - self.cache_pitchf = torch.zeros( - 1024, device=self.device, dtype=torch.float32 - ) - - if last_rvc is None: - models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( - ["assets/hubert/hubert_base.pt"], - suffix="", - ) - hubert_model = models[0] - hubert_model = hubert_model.to(self.device) - if self.is_half: - hubert_model = hubert_model.half() - else: - hubert_model = hubert_model.float() - hubert_model.eval() - self.model = hubert_model - else: - self.model = last_rvc.model - - self.net_g: nn.Module = None - - def set_default_model(): - self.net_g, cpt = get_synthesizer(self.pth_path, self.device) - self.tgt_sr = cpt["config"][-1] - cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] - self.if_f0 = cpt.get("f0", 1) - self.version = cpt.get("version", "v1") - if self.is_half: - self.net_g = self.net_g.half() - else: - self.net_g = self.net_g.float() - - def set_jit_model(): - jit_pth_path = self.pth_path.rstrip(".pth") - jit_pth_path += ".half.jit" if self.is_half else ".jit" - reload = False - if str(self.device) == "cuda": - self.device = torch.device("cuda:0") - if os.path.exists(jit_pth_path): - cpt = jit.load(jit_pth_path) - model_device = cpt["device"] - if model_device != str(self.device): - reload = True - else: - reload = True - - if reload: - cpt = jit.synthesizer_jit_export( - self.pth_path, - "script", - None, - device=self.device, - is_half=self.is_half, - ) - - self.tgt_sr = cpt["config"][-1] - self.if_f0 = cpt.get("f0", 1) - self.version = cpt.get("version", "v1") - self.net_g = torch.jit.load( - BytesIO(cpt["model"]), map_location=self.device - ) - self.net_g.infer = self.net_g.forward - self.net_g.eval().to(self.device) - - def set_synthesizer(): - if self.use_jit and not config.dml: - if self.is_half and "cpu" in str(self.device): - printt( - "Use default Synthesizer model. \ - Jit is not supported on the CPU for half floating point" - ) - set_default_model() - else: - set_jit_model() - else: - set_default_model() - - if last_rvc is None or last_rvc.pth_path != self.pth_path: - set_synthesizer() - else: - self.tgt_sr = last_rvc.tgt_sr - self.if_f0 = last_rvc.if_f0 - self.version = last_rvc.version - self.is_half = last_rvc.is_half - if last_rvc.use_jit != self.use_jit: - set_synthesizer() - else: - self.net_g = last_rvc.net_g - - if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"): - self.model_rmvpe = last_rvc.model_rmvpe - if last_rvc is not None and hasattr(last_rvc, "model_fcpe"): - self.device_fcpe = last_rvc.device_fcpe - self.model_fcpe = last_rvc.model_fcpe - except: - printt(traceback.format_exc()) - - def change_key(self, new_key): - self.f0_up_key = new_key - - def change_index_rate(self, new_index_rate): - if new_index_rate != 0 and self.index_rate == 0: - self.index = faiss.read_index(self.index_path) - self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) - printt("Index search enabled") - self.index_rate = new_index_rate - - def get_f0_post(self, f0): - if not torch.is_tensor(f0): - f0 = torch.from_numpy(f0) - f0 = f0.float().to(self.device).squeeze() - f0_mel = 1127 * torch.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / ( - self.f0_mel_max - self.f0_mel_min - ) + 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > 255] = 255 - f0_coarse = torch.round(f0_mel).long() - return f0_coarse, f0 - - def get_f0(self, x, f0_up_key, n_cpu, method="harvest"): - n_cpu = int(n_cpu) - if method == "crepe": - return self.get_f0_crepe(x, f0_up_key) - if method == "rmvpe": - return self.get_f0_rmvpe(x, f0_up_key) - if method == "fcpe": - return self.get_f0_fcpe(x, f0_up_key) - x = x.cpu().numpy() - if method == "pm": - p_len = x.shape[0] // 160 + 1 - f0_min = 65 - l_pad = int(np.ceil(1.5 / f0_min * 16000)) - r_pad = l_pad + 1 - s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac( - time_step=0.01, - voicing_threshold=0.6, - pitch_floor=f0_min, - pitch_ceiling=1100, - ) - assert np.abs(s.t1 - 1.5 / f0_min) < 0.001 - f0 = s.selected_array["frequency"] - if len(f0) < p_len: - f0 = np.pad(f0, (0, p_len - len(f0))) - f0 = f0[:p_len] - f0 *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0) - if n_cpu == 1: - f0, t = pyworld.harvest( - x.astype(np.double), - fs=16000, - f0_ceil=1100, - f0_floor=50, - frame_period=10, - ) - f0 = signal.medfilt(f0, 3) - f0 *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0) - f0bak = np.zeros(x.shape[0] // 160 + 1, dtype=np.float64) - length = len(x) - part_length = 160 * ((length // 160 - 1) // n_cpu + 1) - n_cpu = (length // 160 - 1) // (part_length // 160) + 1 - ts = ttime() - res_f0 = mm.dict() - for idx in range(n_cpu): - tail = part_length * (idx + 1) + 320 - if idx == 0: - self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts)) - else: - self.inp_q.put( - (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts) - ) - while 1: - res_ts = self.opt_q.get() - if res_ts == ts: - break - f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])] - for idx, f0 in enumerate(f0s): - if idx == 0: - f0 = f0[:-3] - elif idx != n_cpu - 1: - f0 = f0[2:-3] - else: - f0 = f0[2:] - f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = ( - f0 - ) - f0bak = signal.medfilt(f0bak, 3) - f0bak *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0bak) - - def get_f0_crepe(self, x, f0_up_key): - if "privateuseone" in str( - self.device - ): ###不支持dml,cpu又太慢用不成,拿fcpe顶替 - return self.get_f0(x, f0_up_key, 1, "fcpe") - # printt("using crepe,device:%s"%self.device) - f0, pd = torchcrepe.predict( - x.unsqueeze(0).float(), - 16000, - 160, - self.f0_min, - self.f0_max, - "full", - batch_size=512, - # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用 - device=self.device, - return_periodicity=True, - ) - pd = torchcrepe.filter.median(pd, 3) - f0 = torchcrepe.filter.mean(f0, 3) - f0[pd < 0.1] = 0 - f0 *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0) - - def get_f0_rmvpe(self, x, f0_up_key): - if hasattr(self, "model_rmvpe") == False: - from infer.lib.rmvpe import RMVPE - - printt("Loading rmvpe model") - self.model_rmvpe = RMVPE( - "assets/rmvpe/rmvpe.pt", - is_half=self.is_half, - device=self.device, - use_jit=self.config.use_jit, - ) - f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) - f0 *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0) - - def get_f0_fcpe(self, x, f0_up_key): - if hasattr(self, "model_fcpe") == False: - from torchfcpe import spawn_bundled_infer_model - - printt("Loading fcpe model") - if "privateuseone" in str(self.device): - self.device_fcpe = "cpu" - else: - self.device_fcpe = self.device - self.model_fcpe = spawn_bundled_infer_model(self.device_fcpe) - f0 = self.model_fcpe.infer( - x.to(self.device_fcpe).unsqueeze(0).float(), - sr=16000, - decoder_mode="local_argmax", - threshold=0.006, - ) - f0 *= pow(2, f0_up_key / 12) - return self.get_f0_post(f0) - - def infer( - self, - input_wav: torch.Tensor, - block_frame_16k, - skip_head, - return_length, - f0method, - ) -> np.ndarray: - t1 = ttime() - with torch.no_grad(): - if self.config.is_half: - feats = input_wav.half().view(1, -1) - else: - feats = input_wav.float().view(1, -1) - padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) - inputs = { - "source": feats, - "padding_mask": padding_mask, - "output_layer": 9 if self.version == "v1" else 12, - } - logits = self.model.extract_features(**inputs) - feats = ( - self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] - ) - feats = torch.cat((feats, feats[:, -1:, :]), 1) - t2 = ttime() - try: - if hasattr(self, "index") and self.index_rate != 0: - npy = feats[0][skip_head // 2 :].cpu().numpy().astype("float32") - score, ix = self.index.search(npy, k=8) - if (ix >= 0).all(): - weight = np.square(1 / score) - weight /= weight.sum(axis=1, keepdims=True) - npy = np.sum( - self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1 - ) - if self.config.is_half: - npy = npy.astype("float16") - feats[0][skip_head // 2 :] = ( - torch.from_numpy(npy).unsqueeze(0).to(self.device) - * self.index_rate - + (1 - self.index_rate) * feats[0][skip_head // 2 :] - ) - else: - printt( - "Invalid index. You MUST use added_xxxx.index but not trained_xxxx.index!" - ) - else: - printt("Index search FAILED or disabled") - except: - traceback.print_exc() - printt("Index search FAILED") - t3 = ttime() - p_len = input_wav.shape[0] // 160 - if self.if_f0 == 1: - f0_extractor_frame = block_frame_16k + 800 - if f0method == "rmvpe": - f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160 - pitch, pitchf = self.get_f0( - input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method - ) - shift = block_frame_16k // 160 - self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone() - self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone() - self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1] - self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1] - cache_pitch = self.cache_pitch[None, -p_len:] - cache_pitchf = self.cache_pitchf[None, -p_len:] - t4 = ttime() - feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) - feats = feats[:, :p_len, :] - p_len = torch.LongTensor([p_len]).to(self.device) - sid = torch.LongTensor([0]).to(self.device) - skip_head = torch.LongTensor([skip_head]) - return_length = torch.LongTensor([return_length]) - with torch.no_grad(): - if self.if_f0 == 1: - infered_audio, _, _ = self.net_g.infer( - feats, - p_len, - cache_pitch, - cache_pitchf, - sid, - skip_head, - return_length, - ) - else: - infered_audio, _, _ = self.net_g.infer( - feats, p_len, sid, skip_head, return_length - ) - t5 = ttime() - printt( - "Spent time: fea = %.3fs, index = %.3fs, f0 = %.3fs, model = %.3fs", - t2 - t1, - t3 - t2, - t4 - t3, - t5 - t4, - ) - return infered_audio.squeeze().float() diff --git a/venv.sh b/venv.sh deleted file mode 100755 index 577283b..0000000 --- a/venv.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -python3.8 -m venv .venv