mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-01-01 12:35:04 +08:00
commit
4b039b098b
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
.DS_Store
|
||||
__pycache__
|
||||
/TEMP
|
||||
*.pyd
|
||||
hubert_base.pt
|
||||
/logs
|
@ -1,5 +1,7 @@
|
||||
# Retrieval-based-Voice-Conversion-WebUI
|
||||
|
||||
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
|
||||
|
||||
缺失的2个文件夹和2个文件:
|
||||
|
||||
hubert_base.pt
|
||||
|
211
Retrieval_based_Voice_Conversion_WebUI.ipynb
Normal file
211
Retrieval_based_Voice_Conversion_WebUI.ipynb
Normal file
@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "GmFP6bN9dvOq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 查看显卡\n",
|
||||
"!nvidia-smi"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "wjddIFr1oS3W"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 安装依赖\n",
|
||||
"!apt-get -y install build-essential python3-dev ffmpeg\n",
|
||||
"!pip3 install --upgrade setuptools wheel\n",
|
||||
"!pip3 install --upgrade pip\n",
|
||||
"!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "ge_97mfpgqTm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 克隆仓库\n",
|
||||
"\n",
|
||||
"!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
|
||||
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
|
||||
"!mkdir -p pretrained uvr5_weights"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BLDEZADkvlw1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 更新仓库(一般无需执行)\n",
|
||||
"!git pull"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "UG3XpUwEomUz"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!apt -y install -qq aria2\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
|
||||
"\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
|
||||
"\n",
|
||||
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Mwk7Q0Loqzjx"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
|
||||
"\n",
|
||||
"#@markdown 数据集位置\n",
|
||||
"DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\" #@param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"from google.colab import drive\n",
|
||||
"drive.mount('/content/drive')\n",
|
||||
"!mkdir -p /content/dataset\n",
|
||||
"!unzip -d /content/dataset {DATASET}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "7vh6vphDwO0b"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 启动web\n",
|
||||
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
|
||||
"!python3 infer-web.py --colab --pycmd python3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "FgJuNeAwx5Y_"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 手动将训练后的模型文件备份到谷歌云盘\n",
|
||||
"#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n",
|
||||
"\n",
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 模型epoch\n",
|
||||
"MODELEPOCH = 3540 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
|
||||
"\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "OVQoLQJXS7WX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 从谷歌云盘恢复pth\n",
|
||||
"#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n",
|
||||
"\n",
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 模型epoch\n",
|
||||
"MODELEPOCH = 730 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
|
||||
"!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "ZKAyuKb9J6dz"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#@title 手动训练(不推荐)\n",
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n",
|
||||
"\n",
|
||||
"!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n",
|
||||
"\n",
|
||||
"!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"private_outputs": true,
|
||||
"provenance": []
|
||||
},
|
||||
"gpuClass": "standard",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
14
config.py
14
config.py
@ -1,3 +1,10 @@
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--port", type=int, default=7865, help="Listen port")
|
||||
parser.add_argument("--pycmd", type=str, default="python", help="Python command")
|
||||
parser.add_argument("--colab", action='store_true', help="Launch in colab")
|
||||
parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
|
||||
cmd_opts = parser.parse_args()
|
||||
############离线VC参数
|
||||
inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换,别放非音频文件
|
||||
opt_root=r"opt"#输出目录
|
||||
@ -7,10 +14,15 @@ person=r"weights\洛天依v3.pt"#目前只有洛天依v3
|
||||
device = "cuda:0"#填写cuda:x或cpu,x指代第几张卡,只支持N卡加速
|
||||
is_half=True#9-10-20-30-40系显卡无脑True,不影响质量,>=20显卡开启有加速
|
||||
n_cpu=0#默认0用上所有线程,写数字限制CPU资源使用
|
||||
############python命令路径
|
||||
python_cmd=cmd_opts.pycmd
|
||||
listen_port=cmd_opts.port
|
||||
iscolab=cmd_opts.colab
|
||||
noparallel=cmd_opts.noparallel
|
||||
############下头别动
|
||||
import torch
|
||||
if(torch.cuda.is_available()==False):
|
||||
print("没有发现支持的N卡,使用CPU进行推理")
|
||||
print("没有发现支持的N卡, 使用CPU进行推理")
|
||||
device="cpu"
|
||||
is_half=False
|
||||
if(device!="cpu"):
|
||||
|
@ -1,17 +1,20 @@
|
||||
import os,sys,traceback
|
||||
n_part=int(sys.argv[1])
|
||||
i_part=int(sys.argv[2])
|
||||
i_gpu=sys.argv[3]
|
||||
exp_dir=sys.argv[4]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
|
||||
if len(sys.argv) == 4:
|
||||
n_part=int(sys.argv[1])
|
||||
i_part=int(sys.argv[2])
|
||||
exp_dir=sys.argv[3]
|
||||
else:
|
||||
n_part=int(sys.argv[1])
|
||||
i_part=int(sys.argv[2])
|
||||
i_gpu=sys.argv[3]
|
||||
exp_dir=sys.argv[4]
|
||||
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import soundfile as sf
|
||||
import numpy as np
|
||||
import joblib
|
||||
from fairseq import checkpoint_utils
|
||||
import pdb
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
|
||||
@ -48,7 +51,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
||||
)
|
||||
model = models[0]
|
||||
model = model.to(device)
|
||||
model = model.half()
|
||||
if torch.cuda.is_available():
|
||||
model = model.half()
|
||||
model.eval()
|
||||
|
||||
todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
|
||||
@ -67,7 +71,7 @@ else:
|
||||
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
|
||||
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
|
||||
inputs = {
|
||||
"source": feats.half().to(device),
|
||||
"source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
|
||||
"padding_mask": padding_mask.to(device),
|
||||
"output_layer": 9, # layer 9
|
||||
}
|
||||
|
55
infer-web.py
55
infer-web.py
@ -1,9 +1,10 @@
|
||||
from multiprocessing import cpu_count
|
||||
import threading
|
||||
from time import sleep
|
||||
from subprocess import Popen,PIPE,run as runn
|
||||
from subprocess import Popen
|
||||
from time import sleep
|
||||
import torch, pdb, os,traceback,sys,warnings,shutil,numpy as np,faiss
|
||||
import torch, os,traceback,sys,warnings,shutil,numpy as np
|
||||
import faiss
|
||||
#判断是否有能用来训练和加速推理的N卡
|
||||
ncpu=cpu_count()
|
||||
ngpu=torch.cuda.device_count()
|
||||
@ -33,11 +34,9 @@ from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFs
|
||||
from scipy.io import wavfile
|
||||
from fairseq import checkpoint_utils
|
||||
import gradio as gr
|
||||
import librosa
|
||||
import logging
|
||||
from vc_infer_pipeline import VC
|
||||
import soundfile as sf
|
||||
from config import is_half,device,is_half
|
||||
from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel
|
||||
from infer_uvr5 import _audio_pre_
|
||||
from my_utils import load_audio
|
||||
from train.process_ckpt import show_info,change_info,merge,extract_small_model
|
||||
@ -64,9 +63,11 @@ def load_hubert():
|
||||
weight_root="weights"
|
||||
weight_uvr5_root="uvr5_weights"
|
||||
names=[]
|
||||
for name in os.listdir(weight_root):names.append(name)
|
||||
for name in os.listdir(weight_root):
|
||||
if name.endswith(".pth"): names.append(name)
|
||||
uvr5_names=[]
|
||||
for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))
|
||||
for name in os.listdir(weight_uvr5_root):
|
||||
if name.endswith(".pth"): uvr5_names.append(name.replace(".pth",""))
|
||||
|
||||
def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_npy,index_rate):#spk_item, input_audio0, vc_transform0,f0_file,f0method0
|
||||
global tgt_sr,net_g,vc,hubert_model
|
||||
@ -180,7 +181,11 @@ def get_vc(sid):
|
||||
n_spk=cpt["config"][-3]
|
||||
return {"visible": True,"maximum": n_spk, "__type__": "update"}
|
||||
|
||||
def change_choices():return {"choices": sorted(list(os.listdir(weight_root))), "__type__": "update"}
|
||||
def change_choices():
|
||||
names=[]
|
||||
for name in os.listdir(weight_root):
|
||||
if name.endswith(".pth"): names.append(name)
|
||||
return {"choices": sorted(names), "__type__": "update"}
|
||||
def clean():return {"value": "", "__type__": "update"}
|
||||
def change_f0(if_f0_3,sr2):#np7, f0method8,pretrained_G14,pretrained_D15
|
||||
if(if_f0_3=="是"):return {"visible": True, "__type__": "update"},{"visible": True, "__type__": "update"},"pretrained/f0G%s.pth"%sr2,"pretrained/f0D%s.pth"%sr2
|
||||
@ -217,7 +222,7 @@ def preprocess_dataset(trainset_dir,exp_dir,sr,n_p=ncpu):
|
||||
os.makedirs("%s/logs/%s"%(now_dir,exp_dir),exist_ok=True)
|
||||
f = open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir), "w")
|
||||
f.close()
|
||||
cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir,sr,n_p,now_dir,exp_dir)
|
||||
cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir,sr,n_p,now_dir,exp_dir)+str(noparallel)
|
||||
print(cmd)
|
||||
p = Popen(cmd, shell=True)#, stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
|
||||
###煞笔gr,popen read都非得全跑完了再一次性读取,不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
|
||||
@ -237,7 +242,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
|
||||
f = open("%s/logs/%s/extract_f0_feature.log"%(now_dir,exp_dir), "w")
|
||||
f.close()
|
||||
if(if_f0=="是"):
|
||||
cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
|
||||
cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
|
||||
print(cmd)
|
||||
p = Popen(cmd, shell=True,cwd=now_dir)#, stdin=PIPE, stdout=PIPE,stderr=PIPE
|
||||
###煞笔gr,popen read都非得全跑完了再一次性读取,不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
|
||||
@ -261,7 +266,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
|
||||
leng=len(gpus)
|
||||
ps=[]
|
||||
for idx,n_g in enumerate(gpus):
|
||||
cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
|
||||
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
|
||||
print(cmd)
|
||||
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
|
||||
ps.append(p)
|
||||
@ -300,8 +305,12 @@ def click_train(exp_dir1,sr2,if_f0_3,spk_id5,save_epoch10,total_epoch11,batch_si
|
||||
with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
|
||||
print("write filelist done")
|
||||
#生成config#无需生成config
|
||||
# cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
|
||||
cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
# cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
|
||||
print("use gpus:",gpus16)
|
||||
if gpus16:
|
||||
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
else:
|
||||
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
print(cmd)
|
||||
p = Popen(cmd, shell=True, cwd=now_dir)
|
||||
p.wait()
|
||||
@ -346,7 +355,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
|
||||
os.makedirs("%s/logs/%s"%(now_dir,exp_dir1),exist_ok=True)
|
||||
#########step1:处理数据
|
||||
open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir1), "w").close()
|
||||
cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)
|
||||
cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)+str(noparallel)
|
||||
yield get_info_str("step1:正在处理数据")
|
||||
yield get_info_str(cmd)
|
||||
p = Popen(cmd, shell=True)
|
||||
@ -356,7 +365,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
|
||||
open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "w")
|
||||
if(if_f0_3=="是"):
|
||||
yield get_info_str("step2a:正在提取音高")
|
||||
cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
|
||||
cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
|
||||
yield get_info_str(cmd)
|
||||
p = Popen(cmd, shell=True,cwd=now_dir)
|
||||
p.wait()
|
||||
@ -368,7 +377,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
|
||||
leng=len(gpus)
|
||||
ps=[]
|
||||
for idx,n_g in enumerate(gpus):
|
||||
cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
|
||||
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
|
||||
yield get_info_str(cmd)
|
||||
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
|
||||
ps.append(p)
|
||||
@ -394,7 +403,10 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
|
||||
opt.append("%s/%s.wav|%s/%s.npy|%s"%(gt_wavs_dir.replace("\\","\\\\"),name,co256_dir.replace("\\","\\\\"),name,spk_id5))
|
||||
with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
|
||||
yield get_info_str("write filelist done")
|
||||
cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
if gpus16:
|
||||
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
else:
|
||||
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
|
||||
yield get_info_str(cmd)
|
||||
p = Popen(cmd, shell=True, cwd=now_dir)
|
||||
p.wait()
|
||||
@ -443,7 +455,7 @@ with gr.Blocks() as app:
|
||||
with gr.Tabs():
|
||||
with gr.TabItem("模型推理"):
|
||||
with gr.Row():
|
||||
sid0 = gr.Dropdown(label="推理音色", choices=names)
|
||||
sid0 = gr.Dropdown(label="推理音色", choices=sorted(names))
|
||||
refresh_button = gr.Button("刷新音色列表", variant="primary")
|
||||
refresh_button.click(
|
||||
fn=change_choices,
|
||||
@ -625,6 +637,7 @@ with gr.Blocks() as app:
|
||||
with gr.TabItem("点击查看交流、问题反馈群号"):
|
||||
gr.Markdown(value="""xxxxx""")
|
||||
|
||||
# app.launch(server_name="0.0.0.0",server_port=7860)
|
||||
# app.queue(concurrency_count=511, max_size=1022).launch(server_name="127.0.0.1",inbrowser=True,server_port=7861,quiet=True)
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=7865,quiet=True)
|
||||
if iscolab:
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(share=True)
|
||||
else:
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=listen_port,quiet=True)
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
12
my_utils.py
12
my_utils.py
@ -1,4 +1,5 @@
|
||||
import ffmpeg,numpy as np
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
def load_audio(file,sr):
|
||||
try:
|
||||
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
||||
@ -7,12 +8,9 @@ def load_audio(file,sr):
|
||||
out, _ = (
|
||||
ffmpeg.input(file, threads=0)
|
||||
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
|
||||
.run(cmd=["./ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
)
|
||||
except ffmpeg.Error as e:
|
||||
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load audio: {e}")
|
||||
|
||||
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
||||
|
||||
if __name__=='__main__' :
|
||||
print(load_audio(r"C:\CloudMusic\宮野幸子,森下唯 - 月夜に謳う君 -LUNA-.mp3",16000).shape)
|
2
pretrained/.gitignore
vendored
Normal file
2
pretrained/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
@ -4,7 +4,6 @@ import numpy as np
|
||||
# This function is obtained from librosa.
|
||||
def get_rms(
|
||||
y,
|
||||
*,
|
||||
frame_length=2048,
|
||||
hop_length=512,
|
||||
pad_mode="constant",
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -34,9 +34,6 @@ global_step = 0
|
||||
|
||||
|
||||
def main():
|
||||
"""Assume Single Node Multi GPUs Training Only"""
|
||||
assert torch.cuda.is_available(), "CPU training is not allowed."
|
||||
|
||||
# n_gpus = torch.cuda.device_count()
|
||||
os.environ["MASTER_ADDR"] = "localhost"
|
||||
os.environ["MASTER_PORT"] = "5555"
|
||||
@ -65,7 +62,7 @@ def run(rank, n_gpus, hps):
|
||||
backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
|
||||
)
|
||||
torch.manual_seed(hps.train.seed)
|
||||
torch.cuda.set_device(rank)
|
||||
if torch.cuda.is_available(): torch.cuda.set_device(rank)
|
||||
|
||||
if (hps.if_f0 == 1):train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data)
|
||||
else:train_dataset = TextAudioLoader(hps.data.training_files, hps.data)
|
||||
@ -92,9 +89,13 @@ def run(rank, n_gpus, hps):
|
||||
persistent_workers=True,
|
||||
prefetch_factor=8,
|
||||
)
|
||||
if(hps.if_f0==1):net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate).cuda(rank)
|
||||
else:net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run).cuda(rank)
|
||||
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
|
||||
if(hps.if_f0==1):
|
||||
net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate)
|
||||
else:
|
||||
net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run)
|
||||
if torch.cuda.is_available(): net_g = net_g.cuda(rank)
|
||||
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm)
|
||||
if torch.cuda.is_available(): net_d = net_d.cuda(rank)
|
||||
optim_g = torch.optim.AdamW(
|
||||
net_g.parameters(),
|
||||
hps.train.learning_rate,
|
||||
@ -109,8 +110,12 @@ def run(rank, n_gpus, hps):
|
||||
)
|
||||
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
||||
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
||||
net_g = DDP(net_g, device_ids=[rank])
|
||||
net_d = DDP(net_d, device_ids=[rank])
|
||||
if torch.cuda.is_available():
|
||||
net_g = DDP(net_g, device_ids=[rank])
|
||||
net_d = DDP(net_d, device_ids=[rank])
|
||||
else:
|
||||
net_g = DDP(net_g)
|
||||
net_d = DDP(net_d)
|
||||
|
||||
try:#如果能加载自动resume
|
||||
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d) # D多半加载没事
|
||||
@ -190,11 +195,12 @@ def train_and_evaluate(
|
||||
for batch_idx, info in enumerate(train_loader):
|
||||
if (hps.if_f0 == 1):phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths,sid=info
|
||||
else:phone,phone_lengths,spec,spec_lengths,wave,wave_lengths,sid=info
|
||||
phone, phone_lengths = phone.cuda(rank, non_blocking=True),phone_lengths.cuda(rank, non_blocking=True )
|
||||
if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
|
||||
sid = sid.cuda(rank, non_blocking=True)
|
||||
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
|
||||
wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
|
||||
if torch.cuda.is_available():
|
||||
phone, phone_lengths = phone.cuda(rank, non_blocking=True), phone_lengths.cuda(rank, non_blocking=True )
|
||||
if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
|
||||
sid = sid.cuda(rank, non_blocking=True)
|
||||
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
|
||||
wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
|
||||
if(hps.if_cache_data_in_gpu==True):
|
||||
if (hps.if_f0 == 1):cache.append((batch_idx, (phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths ,sid)))
|
||||
else:cache.append((batch_idx, (phone,phone_lengths,spec,spec_lengths,wave,wave_lengths ,sid)))
|
||||
|
@ -1,4 +1,4 @@
|
||||
import sys,os,pdb,multiprocessing
|
||||
import sys,os,multiprocessing
|
||||
now_dir=os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
|
||||
@ -6,20 +6,15 @@ inp_root = sys.argv[1]
|
||||
sr = int(sys.argv[2])
|
||||
n_p = int(sys.argv[3])
|
||||
exp_dir = sys.argv[4]
|
||||
import numpy as np,ffmpeg,os,traceback
|
||||
noparallel = sys.argv[5] == "True"
|
||||
import numpy as np,os,traceback
|
||||
from slicer2 import Slicer
|
||||
from joblib import Parallel, delayed
|
||||
import librosa,traceback
|
||||
from scipy.io import wavfile
|
||||
import multiprocessing
|
||||
from my_utils import load_audio
|
||||
from time import sleep
|
||||
|
||||
f = open("%s/preprocess.log"%exp_dir, "a+")
|
||||
def printt(strr):
|
||||
print(strr)
|
||||
f.write("%s\n" % strr)
|
||||
f.flush()
|
||||
mutex = multiprocessing.Lock()
|
||||
|
||||
class PreProcess():
|
||||
def __init__(self,sr,exp_dir):
|
||||
@ -40,10 +35,18 @@ class PreProcess():
|
||||
self.exp_dir=exp_dir
|
||||
self.gt_wavs_dir="%s/0_gt_wavs"%exp_dir
|
||||
self.wavs16k_dir="%s/1_16k_wavs"%exp_dir
|
||||
self.f = open("%s/preprocess.log"%exp_dir, "a+")
|
||||
os.makedirs(self.exp_dir,exist_ok=True)
|
||||
os.makedirs(self.gt_wavs_dir,exist_ok=True)
|
||||
os.makedirs(self.wavs16k_dir,exist_ok=True)
|
||||
|
||||
def print(self, strr):
|
||||
mutex.acquire()
|
||||
print(strr)
|
||||
self.f.write("%s\n" % strr)
|
||||
self.f.flush()
|
||||
mutex.release()
|
||||
|
||||
def norm_write(self,tmp_audio,idx0,idx1):
|
||||
tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (1 - self.alpha) * tmp_audio
|
||||
wavfile.write("%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), self.sr, (tmp_audio*32768).astype(np.int16))
|
||||
@ -67,9 +70,9 @@ class PreProcess():
|
||||
tmp_audio = audio[start:]
|
||||
break
|
||||
self.norm_write(tmp_audio, idx0, idx1)
|
||||
printt("%s->Suc."%path)
|
||||
self.print("%s->Suc."%path)
|
||||
except:
|
||||
printt("%s->%s"%(path,traceback.format_exc()))
|
||||
self.print("%s->%s"%(path,traceback.format_exc()))
|
||||
|
||||
def pipeline_mp(self,infos):
|
||||
for path, idx0 in infos:
|
||||
@ -78,27 +81,24 @@ class PreProcess():
|
||||
def pipeline_mp_inp_dir(self,inp_root,n_p):
|
||||
try:
|
||||
infos = [("%s/%s" % (inp_root, name), idx) for idx, name in enumerate(sorted(list(os.listdir(inp_root))))]
|
||||
ps=[]
|
||||
for i in range(n_p):
|
||||
p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
|
||||
p.start()
|
||||
ps.append(p)
|
||||
for p in ps:p.join()
|
||||
if noparallel:
|
||||
for i in range(n_p): self.pipeline_mp(infos[i::n_p])
|
||||
else:
|
||||
ps=[]
|
||||
for i in range(n_p):
|
||||
p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
|
||||
p.start()
|
||||
ps.append(p)
|
||||
for p in ps:p.join()
|
||||
except:
|
||||
printt("Fail. %s"%traceback.format_exc())
|
||||
self.print("Fail. %s"%traceback.format_exc())
|
||||
|
||||
def preprocess_trainset(inp_root, sr, n_p, exp_dir):
|
||||
pp=PreProcess(sr,exp_dir)
|
||||
pp.print("start preprocess")
|
||||
pp.print(sys.argv)
|
||||
pp.pipeline_mp_inp_dir(inp_root,n_p)
|
||||
pp.print("end preprocess")
|
||||
|
||||
if __name__=='__main__':
|
||||
# f = open("logs/log_preprocess.log", "w")
|
||||
printt(sys.argv)
|
||||
######################################################
|
||||
# inp_root=r"E:\语音音频+标注\米津玄师\src"
|
||||
# inp_root=r"E:\codes\py39\vits_vc_gpu_train\todo-songs"
|
||||
# sr=40000
|
||||
# n_p = 6
|
||||
# exp_dir=r"E:\codes\py39\dataset\mi-test"
|
||||
|
||||
######################################################
|
||||
printt("start preprocess")
|
||||
pp=PreProcess(sr,exp_dir)
|
||||
pp.pipeline_mp_inp_dir(inp_root,n_p)
|
||||
printt("end preprocess")
|
||||
preprocess_trainset(inp_root, sr, n_p, exp_dir)
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2
uvr5_weights/.gitignore
vendored
Normal file
2
uvr5_weights/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
@ -1 +0,0 @@
|
||||
|
2
weights/.gitignore
vendored
Normal file
2
weights/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
@ -1,6 +1,7 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 liujing04
|
||||
Copyright (c) 2023 源文雨
|
||||
|
||||
本软件及其相关代码以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。
|
||||
如不认可该条款,则不能使用或引用软件包内任何代码和文件。
|
||||
|
Loading…
Reference in New Issue
Block a user