Retrieval-based-Voice-Conve.../Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
Zzzyt c2ab680e18
add is_half argument to extract_feature_print.py (#1683)
* for GTX1650

* testing torch-profiler

* no more profiler & change default audio

* longer slice

* fluid container

* cache rmvpe and ui tweaks

* get my changes back after merge

* format code

* only load rmvpe when necessary

* fix rmvpe & config bug

* fix is_half again

* manual sync with upstream

* revert other changes for pull request
2024-01-11 22:40:34 +09:00

423 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"id": "ZFFCx5J80SGa"
},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GmFP6bN9dvOq"
},
"outputs": [],
"source": [
"# @title #查看显卡\n",
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "jwu07JgqoFON"
},
"outputs": [],
"source": [
"# @title 挂载谷歌云盘\n",
"\n",
"from google.colab import drive\n",
"\n",
"drive.mount(\"/content/drive\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "wjddIFr1oS3W"
},
"outputs": [],
"source": [
"# @title #安装依赖\n",
"!apt-get -y install build-essential python3-dev ffmpeg\n",
"!pip3 install --upgrade setuptools wheel\n",
"!pip3 install --upgrade pip\n",
"!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ge_97mfpgqTm"
},
"outputs": [],
"source": [
"# @title #克隆仓库\n",
"\n",
"!mkdir Retrieval-based-Voice-Conversion-WebUI\n",
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
"!git init\n",
"!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\n",
"!git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1\n",
"!git reset --hard FETCH_HEAD"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BLDEZADkvlw1"
},
"outputs": [],
"source": [
"# @title #更新仓库(一般无需执行)\n",
"!git pull"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pqE0PrnuRqI2"
},
"outputs": [],
"source": [
"# @title #安装aria2\n",
"!apt -y install -qq aria2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UG3XpUwEomUz"
},
"outputs": [],
"source": [
"# @title 下载底模\n",
"\n",
"# v1\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
"\n",
"# v2\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n",
"# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "HugjmZqZRuiF"
},
"outputs": [],
"source": [
"# @title #下载人声分离模型\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2RCaT9FTR0ej"
},
"outputs": [],
"source": [
"# @title #下载hubert_base\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# @title #下载rmvpe模型\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Mwk7Q0Loqzjx"
},
"outputs": [],
"source": [
"# @title #从谷歌云盘加载打包好的数据集到/content/dataset\n",
"\n",
"# @markdown 数据集位置\n",
"DATASET = (\n",
" \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" # @param {type:\"string\"}\n",
")\n",
"\n",
"!mkdir -p /content/dataset\n",
"!unzip -d /content/dataset -B {DATASET}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PDlFxWHWEynD"
},
"outputs": [],
"source": [
"# @title #重命名数据集中的重名文件\n",
"!ls -a /content/dataset/\n",
"!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7vh6vphDwO0b"
},
"outputs": [],
"source": [
"# @title #启动webui\n",
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
"# %load_ext tensorboard\n",
"# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
"!python3 infer-web.py --colab --pycmd python3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FgJuNeAwx5Y_"
},
"outputs": [],
"source": [
"# @title #手动将训练后的模型文件备份到谷歌云盘\n",
"# @markdown #需要自己查看logs文件夹下模型的文件名手动修改下方命令末尾的文件名\n",
"\n",
"# @markdown #模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown #模型epoch\n",
"MODELEPOCH = 9600 # @param {type:\"integer\"}\n",
"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "OVQoLQJXS7WX"
},
"outputs": [],
"source": [
"# @title 从谷歌云盘恢复pth\n",
"# @markdown 需要自己查看logs文件夹下模型的文件名手动修改下方命令末尾的文件名\n",
"\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 模型epoch\n",
"MODELEPOCH = 7500 # @param {type:\"integer\"}\n",
"\n",
"!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
"\n",
"!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
"!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
"!cp /content/drive/MyDrive/*.index /content/\n",
"!cp /content/drive/MyDrive/*.npy /content/\n",
"!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZKAyuKb9J6dz"
},
"outputs": [],
"source": [
"# @title 手动预处理(不推荐)\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 采样率\n",
"BITRATE = 48000 # @param {type:\"integer\"}\n",
"# @markdown 使用的进程数\n",
"THREADCOUNT = 8 # @param {type:\"integer\"}\n",
"\n",
"!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "CrxJqzAUKmPJ"
},
"outputs": [],
"source": [
"# @title 手动提取特征(不推荐)\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 使用的进程数\n",
"THREADCOUNT = 8 # @param {type:\"integer\"}\n",
"# @markdown 音高提取算法\n",
"ALGO = \"harvest\" # @param {type:\"string\"}\n",
"\n",
"!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
"\n",
"!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "IMLPLKOaKj58"
},
"outputs": [],
"source": [
"# @title 手动训练(不推荐)\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 使用的GPU\n",
"USEGPU = \"0\" # @param {type:\"string\"}\n",
"# @markdown 批大小\n",
"BATCHSIZE = 32 # @param {type:\"integer\"}\n",
"# @markdown 停止的epoch\n",
"MODELEPOCH = 3200 # @param {type:\"integer\"}\n",
"# @markdown 保存epoch间隔\n",
"EPOCHSAVE = 100 # @param {type:\"integer\"}\n",
"# @markdown 采样率\n",
"MODELSAMPLE = \"48k\" # @param {type:\"string\"}\n",
"# @markdown 是否缓存训练集\n",
"CACHEDATA = 1 # @param {type:\"integer\"}\n",
"# @markdown 是否仅保存最新的ckpt文件\n",
"ONLYLATEST = 0 # @param {type:\"integer\"}\n",
"\n",
"!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "haYA81hySuDl"
},
"outputs": [],
"source": [
"# @title 删除其它pth只留选中的慎点仔细看代码\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 选中模型epoch\n",
"MODELEPOCH = 9600 # @param {type:\"integer\"}\n",
"\n",
"!echo \"备份选中的模型。。。\"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
"\n",
"!echo \"正在删除。。。\"\n",
"!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
"!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n",
"\n",
"!echo \"恢复选中的模型。。。\"\n",
"!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
"!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
"\n",
"!echo \"删除完成\"\n",
"!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QhSiPTVPoIRh"
},
"outputs": [],
"source": [
"# @title 清除项目下所有文件,只留选中的模型(慎点,仔细看代码)\n",
"# @markdown 模型名\n",
"MODELNAME = \"lulu\" # @param {type:\"string\"}\n",
"# @markdown 选中模型epoch\n",
"MODELEPOCH = 9600 # @param {type:\"integer\"}\n",
"\n",
"!echo \"备份选中的模型。。。\"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n",
"\n",
"!echo \"正在删除。。。\"\n",
"!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
"!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n",
"\n",
"!echo \"恢复选中的模型。。。\"\n",
"!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
"!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n",
"\n",
"!echo \"删除完成\"\n",
"!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"private_outputs": true,
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}