Merge pull request #4 from fumiama/main

增加colab笔记本以方便使用
This commit is contained in:
liujing04 2023-04-01 20:19:20 +08:00 committed by GitHub
commit 4b039b098b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 343 additions and 86 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
.DS_Store
__pycache__
/TEMP
*.pyd
hubert_base.pt
/logs

View File

@ -1,5 +1,7 @@
# Retrieval-based-Voice-Conversion-WebUI
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
缺失的2个文件夹和2个文件
hubert_base.pt

View File

@ -0,0 +1,211 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GmFP6bN9dvOq"
},
"outputs": [],
"source": [
"#@title 查看显卡\n",
"!nvidia-smi"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "wjddIFr1oS3W"
},
"outputs": [],
"source": [
"#@title 安装依赖\n",
"!apt-get -y install build-essential python3-dev ffmpeg\n",
"!pip3 install --upgrade setuptools wheel\n",
"!pip3 install --upgrade pip\n",
"!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ge_97mfpgqTm"
},
"outputs": [],
"source": [
"#@title 克隆仓库\n",
"\n",
"!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
"!mkdir -p pretrained uvr5_weights"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BLDEZADkvlw1"
},
"outputs": [],
"source": [
"#@title 更新仓库(一般无需执行)\n",
"!git pull"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UG3XpUwEomUz"
},
"outputs": [],
"source": [
"!apt -y install -qq aria2\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
"\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
"\n",
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Mwk7Q0Loqzjx"
},
"outputs": [],
"source": [
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
"\n",
"#@markdown 数据集位置\n",
"DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\" #@param {type:\"string\"}\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"!mkdir -p /content/dataset\n",
"!unzip -d /content/dataset {DATASET}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7vh6vphDwO0b"
},
"outputs": [],
"source": [
"#@title 启动web\n",
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
"!python3 infer-web.py --colab --pycmd python3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FgJuNeAwx5Y_"
},
"outputs": [],
"source": [
"#@title 手动将训练后的模型文件备份到谷歌云盘\n",
"#@markdown 需要自己查看logs文件夹下模型的文件名手动修改下方命令末尾的文件名\n",
"\n",
"#@markdown 模型名\n",
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
"#@markdown 模型epoch\n",
"MODELEPOCH = 3540 #@param {type:\"integer\"}\n",
"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
"\n",
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "OVQoLQJXS7WX"
},
"outputs": [],
"source": [
"#@title 从谷歌云盘恢复pth\n",
"#@markdown 需要自己查看logs文件夹下模型的文件名手动修改下方命令末尾的文件名\n",
"\n",
"#@markdown 模型名\n",
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
"#@markdown 模型epoch\n",
"MODELEPOCH = 730 #@param {type:\"integer\"}\n",
"\n",
"!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
"!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZKAyuKb9J6dz"
},
"outputs": [],
"source": [
"#@title 手动训练(不推荐)\n",
"#@markdown 模型名\n",
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
"\n",
"!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n",
"\n",
"!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n",
"\n",
"!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"private_outputs": true,
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -1,3 +1,10 @@
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=7865, help="Listen port")
parser.add_argument("--pycmd", type=str, default="python", help="Python command")
parser.add_argument("--colab", action='store_true', help="Launch in colab")
parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
cmd_opts = parser.parse_args()
############离线VC参数
inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换,别放非音频文件
opt_root=r"opt"#输出目录
@ -7,10 +14,15 @@ person=r"weights\洛天依v3.pt"#目前只有洛天依v3
device = "cuda:0"#填写cuda:x或cpux指代第几张卡只支持N卡加速
is_half=True#9-10-20-30-40系显卡无脑True不影响质量>=20显卡开启有加速
n_cpu=0#默认0用上所有线程写数字限制CPU资源使用
############python命令路径
python_cmd=cmd_opts.pycmd
listen_port=cmd_opts.port
iscolab=cmd_opts.colab
noparallel=cmd_opts.noparallel
############下头别动
import torch
if(torch.cuda.is_available()==False):
print("没有发现支持的N卡使用CPU进行推理")
print("没有发现支持的N卡, 使用CPU进行推理")
device="cpu"
is_half=False
if(device!="cpu"):

View File

@ -1,17 +1,20 @@
import os,sys,traceback
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
i_gpu=sys.argv[3]
exp_dir=sys.argv[4]
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
if len(sys.argv) == 4:
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
exp_dir=sys.argv[3]
else:
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
i_gpu=sys.argv[3]
exp_dir=sys.argv[4]
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
import torch
import torch.nn.functional as F
import soundfile as sf
import numpy as np
import joblib
from fairseq import checkpoint_utils
import pdb
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
@ -48,7 +51,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
)
model = models[0]
model = model.to(device)
model = model.half()
if torch.cuda.is_available():
model = model.half()
model.eval()
todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@ -67,7 +71,7 @@ else:
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.half().to(device),
"source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9, # layer 9
}

View File

@ -1,9 +1,10 @@
from multiprocessing import cpu_count
import threading
from time import sleep
from subprocess import Popen,PIPE,run as runn
from subprocess import Popen
from time import sleep
import torch, pdb, os,traceback,sys,warnings,shutil,numpy as np,faiss
import torch, os,traceback,sys,warnings,shutil,numpy as np
import faiss
#判断是否有能用来训练和加速推理的N卡
ncpu=cpu_count()
ngpu=torch.cuda.device_count()
@ -33,11 +34,9 @@ from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFs
from scipy.io import wavfile
from fairseq import checkpoint_utils
import gradio as gr
import librosa
import logging
from vc_infer_pipeline import VC
import soundfile as sf
from config import is_half,device,is_half
from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel
from infer_uvr5 import _audio_pre_
from my_utils import load_audio
from train.process_ckpt import show_info,change_info,merge,extract_small_model
@ -64,9 +63,11 @@ def load_hubert():
weight_root="weights"
weight_uvr5_root="uvr5_weights"
names=[]
for name in os.listdir(weight_root):names.append(name)
for name in os.listdir(weight_root):
if name.endswith(".pth"): names.append(name)
uvr5_names=[]
for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth"): uvr5_names.append(name.replace(".pth",""))
def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_npy,index_rate):#spk_item, input_audio0, vc_transform0,f0_file,f0method0
global tgt_sr,net_g,vc,hubert_model
@ -180,7 +181,11 @@ def get_vc(sid):
n_spk=cpt["config"][-3]
return {"visible": True,"maximum": n_spk, "__type__": "update"}
def change_choices():return {"choices": sorted(list(os.listdir(weight_root))), "__type__": "update"}
def change_choices():
names=[]
for name in os.listdir(weight_root):
if name.endswith(".pth"): names.append(name)
return {"choices": sorted(names), "__type__": "update"}
def clean():return {"value": "", "__type__": "update"}
def change_f0(if_f0_3,sr2):#np7, f0method8,pretrained_G14,pretrained_D15
if(if_f0_3==""):return {"visible": True, "__type__": "update"},{"visible": True, "__type__": "update"},"pretrained/f0G%s.pth"%sr2,"pretrained/f0D%s.pth"%sr2
@ -217,7 +222,7 @@ def preprocess_dataset(trainset_dir,exp_dir,sr,n_p=ncpu):
os.makedirs("%s/logs/%s"%(now_dir,exp_dir),exist_ok=True)
f = open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir), "w")
f.close()
cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir,sr,n_p,now_dir,exp_dir)
cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir,sr,n_p,now_dir,exp_dir)+str(noparallel)
print(cmd)
p = Popen(cmd, shell=True)#, stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
###煞笔grpopen read都非得全跑完了再一次性读取不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
@ -237,7 +242,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
f = open("%s/logs/%s/extract_f0_feature.log"%(now_dir,exp_dir), "w")
f.close()
if(if_f0==""):
cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
print(cmd)
p = Popen(cmd, shell=True,cwd=now_dir)#, stdin=PIPE, stdout=PIPE,stderr=PIPE
###煞笔grpopen read都非得全跑完了再一次性读取不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
@ -261,7 +266,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
leng=len(gpus)
ps=[]
for idx,n_g in enumerate(gpus):
cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
print(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
@ -300,8 +305,12 @@ def click_train(exp_dir1,sr2,if_f0_3,spk_id5,save_epoch10,total_epoch11,batch_si
with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
print("write filelist done")
#生成config#无需生成config
# cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
# cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
print("use gpus:",gpus16)
if gpus16:
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
else:
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
print(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)
p.wait()
@ -346,7 +355,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
os.makedirs("%s/logs/%s"%(now_dir,exp_dir1),exist_ok=True)
#########step1:处理数据
open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir1), "w").close()
cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)
cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)+str(noparallel)
yield get_info_str("step1:正在处理数据")
yield get_info_str(cmd)
p = Popen(cmd, shell=True)
@ -356,7 +365,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "w")
if(if_f0_3==""):
yield get_info_str("step2a:正在提取音高")
cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
yield get_info_str(cmd)
p = Popen(cmd, shell=True,cwd=now_dir)
p.wait()
@ -368,7 +377,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
leng=len(gpus)
ps=[]
for idx,n_g in enumerate(gpus):
cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
yield get_info_str(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
@ -394,7 +403,10 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
opt.append("%s/%s.wav|%s/%s.npy|%s"%(gt_wavs_dir.replace("\\","\\\\"),name,co256_dir.replace("\\","\\\\"),name,spk_id5))
with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
yield get_info_str("write filelist done")
cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
if gpus16:
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
else:
cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3==""else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13==""else 0,1 if if_cache_gpu17==""else 0)
yield get_info_str(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)
p.wait()
@ -443,7 +455,7 @@ with gr.Blocks() as app:
with gr.Tabs():
with gr.TabItem("模型推理"):
with gr.Row():
sid0 = gr.Dropdown(label="推理音色", choices=names)
sid0 = gr.Dropdown(label="推理音色", choices=sorted(names))
refresh_button = gr.Button("刷新音色列表", variant="primary")
refresh_button.click(
fn=change_choices,
@ -625,6 +637,7 @@ with gr.Blocks() as app:
with gr.TabItem("点击查看交流、问题反馈群号"):
gr.Markdown(value="""xxxxx""")
# app.launch(server_name="0.0.0.0",server_port=7860)
# app.queue(concurrency_count=511, max_size=1022).launch(server_name="127.0.0.1",inbrowser=True,server_port=7861,quiet=True)
app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=7865,quiet=True)
if iscolab:
app.queue(concurrency_count=511, max_size=1022).launch(share=True)
else:
app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=listen_port,quiet=True)

View File

@ -1,4 +1,5 @@
import ffmpeg,numpy as np
import ffmpeg
import numpy as np
def load_audio(file,sr):
try:
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
@ -7,12 +8,9 @@ def load_audio(file,sr):
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(cmd=["./ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
except Exception as e:
raise RuntimeError(f"Failed to load audio: {e}")
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
if __name__=='__main__' :
print(load_audio(r"C:\CloudMusic\宮野幸子,森下唯 - 月夜に謳う君 -LUNA-.mp3",16000).shape)

2
pretrained/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -4,7 +4,6 @@ import numpy as np
# This function is obtained from librosa.
def get_rms(
y,
*,
frame_length=2048,
hop_length=512,
pad_mode="constant",

View File

@ -34,9 +34,6 @@ global_step = 0
def main():
"""Assume Single Node Multi GPUs Training Only"""
assert torch.cuda.is_available(), "CPU training is not allowed."
# n_gpus = torch.cuda.device_count()
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "5555"
@ -65,7 +62,7 @@ def run(rank, n_gpus, hps):
backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
)
torch.manual_seed(hps.train.seed)
torch.cuda.set_device(rank)
if torch.cuda.is_available(): torch.cuda.set_device(rank)
if (hps.if_f0 == 1):train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data)
else:train_dataset = TextAudioLoader(hps.data.training_files, hps.data)
@ -92,9 +89,13 @@ def run(rank, n_gpus, hps):
persistent_workers=True,
prefetch_factor=8,
)
if(hps.if_f0==1):net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate).cuda(rank)
else:net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run).cuda(rank)
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
if(hps.if_f0==1):
net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run)
if torch.cuda.is_available(): net_g = net_g.cuda(rank)
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm)
if torch.cuda.is_available(): net_d = net_d.cuda(rank)
optim_g = torch.optim.AdamW(
net_g.parameters(),
hps.train.learning_rate,
@ -109,8 +110,12 @@ def run(rank, n_gpus, hps):
)
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
if torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
try:#如果能加载自动resume
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d) # D多半加载没事
@ -190,11 +195,12 @@ def train_and_evaluate(
for batch_idx, info in enumerate(train_loader):
if (hps.if_f0 == 1):phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths,sid=info
else:phone,phone_lengths,spec,spec_lengths,wave,wave_lengths,sid=info
phone, phone_lengths = phone.cuda(rank, non_blocking=True),phone_lengths.cuda(rank, non_blocking=True )
if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
sid = sid.cuda(rank, non_blocking=True)
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
if torch.cuda.is_available():
phone, phone_lengths = phone.cuda(rank, non_blocking=True), phone_lengths.cuda(rank, non_blocking=True )
if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
sid = sid.cuda(rank, non_blocking=True)
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
if(hps.if_cache_data_in_gpu==True):
if (hps.if_f0 == 1):cache.append((batch_idx, (phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths ,sid)))
else:cache.append((batch_idx, (phone,phone_lengths,spec,spec_lengths,wave,wave_lengths ,sid)))

View File

@ -1,4 +1,4 @@
import sys,os,pdb,multiprocessing
import sys,os,multiprocessing
now_dir=os.getcwd()
sys.path.append(now_dir)
@ -6,20 +6,15 @@ inp_root = sys.argv[1]
sr = int(sys.argv[2])
n_p = int(sys.argv[3])
exp_dir = sys.argv[4]
import numpy as np,ffmpeg,os,traceback
noparallel = sys.argv[5] == "True"
import numpy as np,os,traceback
from slicer2 import Slicer
from joblib import Parallel, delayed
import librosa,traceback
from scipy.io import wavfile
import multiprocessing
from my_utils import load_audio
from time import sleep
f = open("%s/preprocess.log"%exp_dir, "a+")
def printt(strr):
print(strr)
f.write("%s\n" % strr)
f.flush()
mutex = multiprocessing.Lock()
class PreProcess():
def __init__(self,sr,exp_dir):
@ -40,10 +35,18 @@ class PreProcess():
self.exp_dir=exp_dir
self.gt_wavs_dir="%s/0_gt_wavs"%exp_dir
self.wavs16k_dir="%s/1_16k_wavs"%exp_dir
self.f = open("%s/preprocess.log"%exp_dir, "a+")
os.makedirs(self.exp_dir,exist_ok=True)
os.makedirs(self.gt_wavs_dir,exist_ok=True)
os.makedirs(self.wavs16k_dir,exist_ok=True)
def print(self, strr):
mutex.acquire()
print(strr)
self.f.write("%s\n" % strr)
self.f.flush()
mutex.release()
def norm_write(self,tmp_audio,idx0,idx1):
tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (1 - self.alpha) * tmp_audio
wavfile.write("%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), self.sr, (tmp_audio*32768).astype(np.int16))
@ -67,9 +70,9 @@ class PreProcess():
tmp_audio = audio[start:]
break
self.norm_write(tmp_audio, idx0, idx1)
printt("%s->Suc."%path)
self.print("%s->Suc."%path)
except:
printt("%s->%s"%(path,traceback.format_exc()))
self.print("%s->%s"%(path,traceback.format_exc()))
def pipeline_mp(self,infos):
for path, idx0 in infos:
@ -78,27 +81,24 @@ class PreProcess():
def pipeline_mp_inp_dir(self,inp_root,n_p):
try:
infos = [("%s/%s" % (inp_root, name), idx) for idx, name in enumerate(sorted(list(os.listdir(inp_root))))]
ps=[]
for i in range(n_p):
p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
p.start()
ps.append(p)
for p in ps:p.join()
if noparallel:
for i in range(n_p): self.pipeline_mp(infos[i::n_p])
else:
ps=[]
for i in range(n_p):
p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
p.start()
ps.append(p)
for p in ps:p.join()
except:
printt("Fail. %s"%traceback.format_exc())
self.print("Fail. %s"%traceback.format_exc())
def preprocess_trainset(inp_root, sr, n_p, exp_dir):
pp=PreProcess(sr,exp_dir)
pp.print("start preprocess")
pp.print(sys.argv)
pp.pipeline_mp_inp_dir(inp_root,n_p)
pp.print("end preprocess")
if __name__=='__main__':
# f = open("logs/log_preprocess.log", "w")
printt(sys.argv)
######################################################
# inp_root=r"E:\语音音频+标注\米津玄师\src"
# inp_root=r"E:\codes\py39\vits_vc_gpu_train\todo-songs"
# sr=40000
# n_p = 6
# exp_dir=r"E:\codes\py39\dataset\mi-test"
######################################################
printt("start preprocess")
pp=PreProcess(sr,exp_dir)
pp.pipeline_mp_inp_dir(inp_root,n_p)
printt("end preprocess")
preprocess_trainset(inp_root, sr, n_p, exp_dir)

2
uvr5_weights/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -1 +0,0 @@

2
weights/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -1,6 +1,7 @@
MIT License
Copyright (c) 2023 liujing04
Copyright (c) 2023 源文雨
本软件及其相关代码以MIT协议开源作者不对软件具备任何控制力使用软件者、传播软件导出的声音者自负全责。
如不认可该条款,则不能使用或引用软件包内任何代码和文件。