diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82c8a74 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.DS_Store +__pycache__ +/TEMP +*.pyd +hubert_base.pt +/logs diff --git a/README.md b/README.md index 98d576a..37cba22 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Retrieval-based-Voice-Conversion-WebUI +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) + 缺失的2个文件夹和2个文件: hubert_base.pt diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb new file mode 100644 index 0000000..125beb1 --- /dev/null +++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GmFP6bN9dvOq" + }, + "outputs": [], + "source": [ + "#@title 查看显卡\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wjddIFr1oS3W" + }, + "outputs": [], + "source": [ + "#@title 安装依赖\n", + "!apt-get -y install build-essential python3-dev ffmpeg\n", + "!pip3 install --upgrade setuptools wheel\n", + "!pip3 install --upgrade pip\n", + "!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ge_97mfpgqTm" + }, + "outputs": [], + "source": [ + "#@title 克隆仓库\n", + "\n", + "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "!mkdir -p pretrained uvr5_weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BLDEZADkvlw1" + }, + "outputs": [], + "source": [ + "#@title 更新仓库(一般无需执行)\n", + "!git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UG3XpUwEomUz" + }, + "outputs": [], + "source": [ + "!apt -y install -qq aria2\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n", + "\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n", + "\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mwk7Q0Loqzjx" + }, + "outputs": [], + "source": [ + "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n", + "\n", + "#@markdown 数据集位置\n", + "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\" #@param {type:\"string\"}\n", + "\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')\n", + "!mkdir -p /content/dataset\n", + "!unzip -d /content/dataset {DATASET}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7vh6vphDwO0b" + }, + "outputs": [], + "source": [ + "#@title 启动web\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "!python3 infer-web.py --colab --pycmd python3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FgJuNeAwx5Y_" + }, + "outputs": [], + "source": [ + "#@title 手动将训练后的模型文件备份到谷歌云盘\n", + "#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "#@markdown 模型名\n", + "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "#@markdown 模型epoch\n", + "MODELEPOCH = 3540 #@param {type:\"integer\"}\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OVQoLQJXS7WX" + }, + "outputs": [], + "source": [ + "#@title 从谷歌云盘恢复pth\n", + "#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "#@markdown 模型名\n", + "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "#@markdown 模型epoch\n", + "MODELEPOCH = 730 #@param {type:\"integer\"}\n", + "\n", + "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZKAyuKb9J6dz" + }, + "outputs": [], + "source": [ + "#@title 手动训练(不推荐)\n", + "#@markdown 模型名\n", + "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", + "\n", + "!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n", + "\n", + "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n", + "\n", + "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "private_outputs": true, + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/config.py b/config.py index f379ea7..086f882 100644 --- a/config.py +++ b/config.py @@ -1,3 +1,10 @@ +import argparse +parser = argparse.ArgumentParser() +parser.add_argument("--port", type=int, default=7865, help="Listen port") +parser.add_argument("--pycmd", type=str, default="python", help="Python command") +parser.add_argument("--colab", action='store_true', help="Launch in colab") +parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing") +cmd_opts = parser.parse_args() ############离线VC参数 inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换,别放非音频文件 opt_root=r"opt"#输出目录 @@ -7,10 +14,15 @@ person=r"weights\洛天依v3.pt"#目前只有洛天依v3 device = "cuda:0"#填写cuda:x或cpu,x指代第几张卡,只支持N卡加速 is_half=True#9-10-20-30-40系显卡无脑True,不影响质量,>=20显卡开启有加速 n_cpu=0#默认0用上所有线程,写数字限制CPU资源使用 +############python命令路径 +python_cmd=cmd_opts.pycmd +listen_port=cmd_opts.port +iscolab=cmd_opts.colab +noparallel=cmd_opts.noparallel ############下头别动 import torch if(torch.cuda.is_available()==False): - print("没有发现支持的N卡,使用CPU进行推理") + print("没有发现支持的N卡, 使用CPU进行推理") device="cpu" is_half=False if(device!="cpu"): diff --git a/extract_feature_print.py b/extract_feature_print.py index 7a0ff4d..2e5eeb3 100644 --- a/extract_feature_print.py +++ b/extract_feature_print.py @@ -1,17 +1,20 @@ import os,sys,traceback -n_part=int(sys.argv[1]) -i_part=int(sys.argv[2]) -i_gpu=sys.argv[3] -exp_dir=sys.argv[4] -os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) +if len(sys.argv) == 4: + n_part=int(sys.argv[1]) + i_part=int(sys.argv[2]) + exp_dir=sys.argv[3] +else: + n_part=int(sys.argv[1]) + i_part=int(sys.argv[2]) + i_gpu=sys.argv[3] + exp_dir=sys.argv[4] + os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) import torch import torch.nn.functional as F import soundfile as sf import numpy as np -import joblib from fairseq import checkpoint_utils -import pdb device = torch.device("cuda" if torch.cuda.is_available() else "cpu") f = open("%s/extract_f0_feature.log"%exp_dir, "a+") @@ -48,7 +51,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( ) model = models[0] model = model.to(device) -model = model.half() +if torch.cuda.is_available(): + model = model.half() model.eval() todo=sorted(list(os.listdir(wavPath)))[i_part::n_part] @@ -67,7 +71,7 @@ else: feats = readwave(wav_path, normalize=saved_cfg.task.normalize) padding_mask = torch.BoolTensor(feats.shape).fill_(False) inputs = { - "source": feats.half().to(device), + "source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device), "padding_mask": padding_mask.to(device), "output_layer": 9, # layer 9 } diff --git a/infer-web.py b/infer-web.py index cf0f242..403ac7c 100644 --- a/infer-web.py +++ b/infer-web.py @@ -1,9 +1,10 @@ from multiprocessing import cpu_count import threading from time import sleep -from subprocess import Popen,PIPE,run as runn +from subprocess import Popen from time import sleep -import torch, pdb, os,traceback,sys,warnings,shutil,numpy as np,faiss +import torch, os,traceback,sys,warnings,shutil,numpy as np +import faiss #判断是否有能用来训练和加速推理的N卡 ncpu=cpu_count() ngpu=torch.cuda.device_count() @@ -33,11 +34,9 @@ from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFs from scipy.io import wavfile from fairseq import checkpoint_utils import gradio as gr -import librosa import logging from vc_infer_pipeline import VC -import soundfile as sf -from config import is_half,device,is_half +from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel from infer_uvr5 import _audio_pre_ from my_utils import load_audio from train.process_ckpt import show_info,change_info,merge,extract_small_model @@ -64,9 +63,11 @@ def load_hubert(): weight_root="weights" weight_uvr5_root="uvr5_weights" names=[] -for name in os.listdir(weight_root):names.append(name) +for name in os.listdir(weight_root): + if name.endswith(".pth"): names.append(name) uvr5_names=[] -for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth","")) +for name in os.listdir(weight_uvr5_root): + if name.endswith(".pth"): uvr5_names.append(name.replace(".pth","")) def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_npy,index_rate):#spk_item, input_audio0, vc_transform0,f0_file,f0method0 global tgt_sr,net_g,vc,hubert_model @@ -180,7 +181,11 @@ def get_vc(sid): n_spk=cpt["config"][-3] return {"visible": True,"maximum": n_spk, "__type__": "update"} -def change_choices():return {"choices": sorted(list(os.listdir(weight_root))), "__type__": "update"} +def change_choices(): + names=[] + for name in os.listdir(weight_root): + if name.endswith(".pth"): names.append(name) + return {"choices": sorted(names), "__type__": "update"} def clean():return {"value": "", "__type__": "update"} def change_f0(if_f0_3,sr2):#np7, f0method8,pretrained_G14,pretrained_D15 if(if_f0_3=="是"):return {"visible": True, "__type__": "update"},{"visible": True, "__type__": "update"},"pretrained/f0G%s.pth"%sr2,"pretrained/f0D%s.pth"%sr2 @@ -217,7 +222,7 @@ def preprocess_dataset(trainset_dir,exp_dir,sr,n_p=ncpu): os.makedirs("%s/logs/%s"%(now_dir,exp_dir),exist_ok=True) f = open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir), "w") f.close() - cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir,sr,n_p,now_dir,exp_dir) + cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir,sr,n_p,now_dir,exp_dir)+str(noparallel) print(cmd) p = Popen(cmd, shell=True)#, stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir ###煞笔gr,popen read都非得全跑完了再一次性读取,不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 @@ -237,7 +242,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir): f = open("%s/logs/%s/extract_f0_feature.log"%(now_dir,exp_dir), "w") f.close() if(if_f0=="是"): - cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method) + cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method) print(cmd) p = Popen(cmd, shell=True,cwd=now_dir)#, stdin=PIPE, stdout=PIPE,stderr=PIPE ###煞笔gr,popen read都非得全跑完了再一次性读取,不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 @@ -261,7 +266,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir): leng=len(gpus) ps=[] for idx,n_g in enumerate(gpus): - cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir) + cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir) print(cmd) p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir ps.append(p) @@ -300,8 +305,12 @@ def click_train(exp_dir1,sr2,if_f0_3,spk_id5,save_epoch10,total_epoch11,batch_si with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt)) print("write filelist done") #生成config#无需生成config - # cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" - cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) + # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" + print("use gpus:",gpus16) + if gpus16: + cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) + else: + cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) print(cmd) p = Popen(cmd, shell=True, cwd=now_dir) p.wait() @@ -346,7 +355,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth os.makedirs("%s/logs/%s"%(now_dir,exp_dir1),exist_ok=True) #########step1:处理数据 open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir1), "w").close() - cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1) + cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)+str(noparallel) yield get_info_str("step1:正在处理数据") yield get_info_str(cmd) p = Popen(cmd, shell=True) @@ -356,7 +365,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "w") if(if_f0_3=="是"): yield get_info_str("step2a:正在提取音高") - cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8) + cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8) yield get_info_str(cmd) p = Popen(cmd, shell=True,cwd=now_dir) p.wait() @@ -368,7 +377,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth leng=len(gpus) ps=[] for idx,n_g in enumerate(gpus): - cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1) + cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1) yield get_info_str(cmd) p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir ps.append(p) @@ -394,7 +403,10 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth opt.append("%s/%s.wav|%s/%s.npy|%s"%(gt_wavs_dir.replace("\\","\\\\"),name,co256_dir.replace("\\","\\\\"),name,spk_id5)) with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt)) yield get_info_str("write filelist done") - cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) + if gpus16: + cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) + else: + cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0) yield get_info_str(cmd) p = Popen(cmd, shell=True, cwd=now_dir) p.wait() @@ -443,7 +455,7 @@ with gr.Blocks() as app: with gr.Tabs(): with gr.TabItem("模型推理"): with gr.Row(): - sid0 = gr.Dropdown(label="推理音色", choices=names) + sid0 = gr.Dropdown(label="推理音色", choices=sorted(names)) refresh_button = gr.Button("刷新音色列表", variant="primary") refresh_button.click( fn=change_choices, @@ -625,6 +637,7 @@ with gr.Blocks() as app: with gr.TabItem("点击查看交流、问题反馈群号"): gr.Markdown(value="""xxxxx""") - # app.launch(server_name="0.0.0.0",server_port=7860) - # app.queue(concurrency_count=511, max_size=1022).launch(server_name="127.0.0.1",inbrowser=True,server_port=7861,quiet=True) - app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=7865,quiet=True) \ No newline at end of file + if iscolab: + app.queue(concurrency_count=511, max_size=1022).launch(share=True) + else: + app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=listen_port,quiet=True) diff --git a/infer_pack/__pycache__/attentions.cpython-39.pyc b/infer_pack/__pycache__/attentions.cpython-39.pyc deleted file mode 100644 index 6073f25..0000000 Binary files a/infer_pack/__pycache__/attentions.cpython-39.pyc and /dev/null differ diff --git a/infer_pack/__pycache__/commons.cpython-39.pyc b/infer_pack/__pycache__/commons.cpython-39.pyc deleted file mode 100644 index de9f06c..0000000 Binary files a/infer_pack/__pycache__/commons.cpython-39.pyc and /dev/null differ diff --git a/infer_pack/__pycache__/models.cpython-39.pyc b/infer_pack/__pycache__/models.cpython-39.pyc deleted file mode 100644 index 068c8c5..0000000 Binary files a/infer_pack/__pycache__/models.cpython-39.pyc and /dev/null differ diff --git a/infer_pack/__pycache__/modules.cpython-39.pyc b/infer_pack/__pycache__/modules.cpython-39.pyc deleted file mode 100644 index b5d62e1..0000000 Binary files a/infer_pack/__pycache__/modules.cpython-39.pyc and /dev/null differ diff --git a/infer_pack/__pycache__/transforms.cpython-39.pyc b/infer_pack/__pycache__/transforms.cpython-39.pyc deleted file mode 100644 index 2676b31..0000000 Binary files a/infer_pack/__pycache__/transforms.cpython-39.pyc and /dev/null differ diff --git a/my_utils.py b/my_utils.py index 48a93b6..c9c3343 100644 --- a/my_utils.py +++ b/my_utils.py @@ -1,4 +1,5 @@ -import ffmpeg,numpy as np +import ffmpeg +import numpy as np def load_audio(file,sr): try: # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 @@ -7,12 +8,9 @@ def load_audio(file,sr): out, _ = ( ffmpeg.input(file, threads=0) .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr) - .run(cmd=["./ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) ) - except ffmpeg.Error as e: - raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e + except Exception as e: + raise RuntimeError(f"Failed to load audio: {e}") return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 - -if __name__=='__main__' : - print(load_audio(r"C:\CloudMusic\宮野幸子,森下唯 - 月夜に謳う君 -LUNA-.mp3",16000).shape) \ No newline at end of file diff --git a/pretrained/.gitignore b/pretrained/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/pretrained/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/slicer2.py b/slicer2.py index 84ea78c..a09f0de 100644 --- a/slicer2.py +++ b/slicer2.py @@ -4,7 +4,6 @@ import numpy as np # This function is obtained from librosa. def get_rms( y, - *, frame_length=2048, hop_length=512, pad_mode="constant", diff --git a/train/__pycache__/data_utils.cpython-39.pyc b/train/__pycache__/data_utils.cpython-39.pyc deleted file mode 100644 index c3cbe4c..0000000 Binary files a/train/__pycache__/data_utils.cpython-39.pyc and /dev/null differ diff --git a/train/__pycache__/losses.cpython-39.pyc b/train/__pycache__/losses.cpython-39.pyc deleted file mode 100644 index bb61e47..0000000 Binary files a/train/__pycache__/losses.cpython-39.pyc and /dev/null differ diff --git a/train/__pycache__/mel_processing.cpython-39.pyc b/train/__pycache__/mel_processing.cpython-39.pyc deleted file mode 100644 index d2310c5..0000000 Binary files a/train/__pycache__/mel_processing.cpython-39.pyc and /dev/null differ diff --git a/train/__pycache__/process_ckpt.cpython-39.pyc b/train/__pycache__/process_ckpt.cpython-39.pyc deleted file mode 100644 index b70104a..0000000 Binary files a/train/__pycache__/process_ckpt.cpython-39.pyc and /dev/null differ diff --git a/train/__pycache__/utils.cpython-39.pyc b/train/__pycache__/utils.cpython-39.pyc deleted file mode 100644 index 7a6f49c..0000000 Binary files a/train/__pycache__/utils.cpython-39.pyc and /dev/null differ diff --git a/train_nsf_sim_cache_sid_load_pretrain.py b/train_nsf_sim_cache_sid_load_pretrain.py index 3d84d8c..3fb341b 100644 --- a/train_nsf_sim_cache_sid_load_pretrain.py +++ b/train_nsf_sim_cache_sid_load_pretrain.py @@ -34,9 +34,6 @@ global_step = 0 def main(): - """Assume Single Node Multi GPUs Training Only""" - assert torch.cuda.is_available(), "CPU training is not allowed." - # n_gpus = torch.cuda.device_count() os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "5555" @@ -65,7 +62,7 @@ def run(rank, n_gpus, hps): backend="gloo", init_method="env://", world_size=n_gpus, rank=rank ) torch.manual_seed(hps.train.seed) - torch.cuda.set_device(rank) + if torch.cuda.is_available(): torch.cuda.set_device(rank) if (hps.if_f0 == 1):train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data) else:train_dataset = TextAudioLoader(hps.data.training_files, hps.data) @@ -92,9 +89,13 @@ def run(rank, n_gpus, hps): persistent_workers=True, prefetch_factor=8, ) - if(hps.if_f0==1):net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate).cuda(rank) - else:net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run).cuda(rank) - net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) + if(hps.if_f0==1): + net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run) + if torch.cuda.is_available(): net_g = net_g.cuda(rank) + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm) + if torch.cuda.is_available(): net_d = net_d.cuda(rank) optim_g = torch.optim.AdamW( net_g.parameters(), hps.train.learning_rate, @@ -109,8 +110,12 @@ def run(rank, n_gpus, hps): ) # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) - net_g = DDP(net_g, device_ids=[rank]) - net_d = DDP(net_d, device_ids=[rank]) + if torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank]) + net_d = DDP(net_d, device_ids=[rank]) + else: + net_g = DDP(net_g) + net_d = DDP(net_d) try:#如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d) # D多半加载没事 @@ -190,11 +195,12 @@ def train_and_evaluate( for batch_idx, info in enumerate(train_loader): if (hps.if_f0 == 1):phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths,sid=info else:phone,phone_lengths,spec,spec_lengths,wave,wave_lengths,sid=info - phone, phone_lengths = phone.cuda(rank, non_blocking=True),phone_lengths.cuda(rank, non_blocking=True ) - if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True) - sid = sid.cuda(rank, non_blocking=True) - spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True) - wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True) + if torch.cuda.is_available(): + phone, phone_lengths = phone.cuda(rank, non_blocking=True), phone_lengths.cuda(rank, non_blocking=True ) + if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True) + sid = sid.cuda(rank, non_blocking=True) + spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True) + wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True) if(hps.if_cache_data_in_gpu==True): if (hps.if_f0 == 1):cache.append((batch_idx, (phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths ,sid))) else:cache.append((batch_idx, (phone,phone_lengths,spec,spec_lengths,wave,wave_lengths ,sid))) diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py index e5c9d45..a5af367 100644 --- a/trainset_preprocess_pipeline_print.py +++ b/trainset_preprocess_pipeline_print.py @@ -1,4 +1,4 @@ -import sys,os,pdb,multiprocessing +import sys,os,multiprocessing now_dir=os.getcwd() sys.path.append(now_dir) @@ -6,20 +6,15 @@ inp_root = sys.argv[1] sr = int(sys.argv[2]) n_p = int(sys.argv[3]) exp_dir = sys.argv[4] -import numpy as np,ffmpeg,os,traceback +noparallel = sys.argv[5] == "True" +import numpy as np,os,traceback from slicer2 import Slicer -from joblib import Parallel, delayed import librosa,traceback from scipy.io import wavfile import multiprocessing from my_utils import load_audio -from time import sleep -f = open("%s/preprocess.log"%exp_dir, "a+") -def printt(strr): - print(strr) - f.write("%s\n" % strr) - f.flush() +mutex = multiprocessing.Lock() class PreProcess(): def __init__(self,sr,exp_dir): @@ -40,10 +35,18 @@ class PreProcess(): self.exp_dir=exp_dir self.gt_wavs_dir="%s/0_gt_wavs"%exp_dir self.wavs16k_dir="%s/1_16k_wavs"%exp_dir + self.f = open("%s/preprocess.log"%exp_dir, "a+") os.makedirs(self.exp_dir,exist_ok=True) os.makedirs(self.gt_wavs_dir,exist_ok=True) os.makedirs(self.wavs16k_dir,exist_ok=True) + def print(self, strr): + mutex.acquire() + print(strr) + self.f.write("%s\n" % strr) + self.f.flush() + mutex.release() + def norm_write(self,tmp_audio,idx0,idx1): tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (1 - self.alpha) * tmp_audio wavfile.write("%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), self.sr, (tmp_audio*32768).astype(np.int16)) @@ -67,9 +70,9 @@ class PreProcess(): tmp_audio = audio[start:] break self.norm_write(tmp_audio, idx0, idx1) - printt("%s->Suc."%path) + self.print("%s->Suc."%path) except: - printt("%s->%s"%(path,traceback.format_exc())) + self.print("%s->%s"%(path,traceback.format_exc())) def pipeline_mp(self,infos): for path, idx0 in infos: @@ -78,27 +81,24 @@ class PreProcess(): def pipeline_mp_inp_dir(self,inp_root,n_p): try: infos = [("%s/%s" % (inp_root, name), idx) for idx, name in enumerate(sorted(list(os.listdir(inp_root))))] - ps=[] - for i in range(n_p): - p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],)) - p.start() - ps.append(p) - for p in ps:p.join() + if noparallel: + for i in range(n_p): self.pipeline_mp(infos[i::n_p]) + else: + ps=[] + for i in range(n_p): + p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],)) + p.start() + ps.append(p) + for p in ps:p.join() except: - printt("Fail. %s"%traceback.format_exc()) + self.print("Fail. %s"%traceback.format_exc()) + +def preprocess_trainset(inp_root, sr, n_p, exp_dir): + pp=PreProcess(sr,exp_dir) + pp.print("start preprocess") + pp.print(sys.argv) + pp.pipeline_mp_inp_dir(inp_root,n_p) + pp.print("end preprocess") if __name__=='__main__': - # f = open("logs/log_preprocess.log", "w") - printt(sys.argv) - ###################################################### - # inp_root=r"E:\语音音频+标注\米津玄师\src" - # inp_root=r"E:\codes\py39\vits_vc_gpu_train\todo-songs" - # sr=40000 - # n_p = 6 - # exp_dir=r"E:\codes\py39\dataset\mi-test" - - ###################################################### - printt("start preprocess") - pp=PreProcess(sr,exp_dir) - pp.pipeline_mp_inp_dir(inp_root,n_p) - printt("end preprocess") + preprocess_trainset(inp_root, sr, n_p, exp_dir) diff --git a/uvr5_pack/__pycache__/utils.cpython-39.pyc b/uvr5_pack/__pycache__/utils.cpython-39.pyc deleted file mode 100644 index eff88c5..0000000 Binary files a/uvr5_pack/__pycache__/utils.cpython-39.pyc and /dev/null differ diff --git a/uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc b/uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc deleted file mode 100644 index a8ec492..0000000 Binary files a/uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc and /dev/null differ diff --git a/uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc b/uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc deleted file mode 100644 index cb0c984..0000000 Binary files a/uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc and /dev/null differ diff --git a/uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc b/uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc deleted file mode 100644 index b13e91d..0000000 Binary files a/uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc and /dev/null differ diff --git a/uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc b/uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc deleted file mode 100644 index 2a1ca01..0000000 Binary files a/uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc and /dev/null differ diff --git a/uvr5_weights/.gitignore b/uvr5_weights/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/uvr5_weights/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/weights/... b/weights/... deleted file mode 100644 index 8b13789..0000000 --- a/weights/... +++ /dev/null @@ -1 +0,0 @@ - diff --git a/weights/.gitignore b/weights/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/weights/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/使用需遵守的协议-LICENSE.txt b/使用需遵守的协议-LICENSE.txt index 37abffc..db2094b 100644 --- a/使用需遵守的协议-LICENSE.txt +++ b/使用需遵守的协议-LICENSE.txt @@ -1,6 +1,7 @@ MIT License Copyright (c) 2023 liujing04 +Copyright (c) 2023 源文雨 本软件及其相关代码以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。 如不认可该条款,则不能使用或引用软件包内任何代码和文件。