From 03e7c68c11d766a1a2c69452d762324a5c5121f9 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sun, 13 Aug 2023 01:05:58 +0800 Subject: [PATCH] Add files via upload --- MDXNet.py | 23 ++- config.py | 36 +++- extract_f0_rmvpe.py | 4 +- extract_f0_rmvpe_dml.py | 129 ++++++++++++ extract_feature_print.py | 25 ++- gui_v1.py | 28 +-- infer-web.py | 186 +++++++++-------- rvc_for_realtime.py | 31 ++- trainset_preprocess_pipeline_print.py | 278 +++++++++++++------------- values1.json | 1 + vc_infer_pipeline.py | 7 +- 11 files changed, 478 insertions(+), 270 deletions(-) create mode 100644 extract_f0_rmvpe_dml.py create mode 100644 values1.json diff --git a/MDXNet.py b/MDXNet.py index 19164b2..df2c672 100644 --- a/MDXNet.py +++ b/MDXNet.py @@ -1,7 +1,6 @@ import soundfile as sf import torch, pdb, os, warnings, librosa import numpy as np -import onnxruntime as ort from tqdm import tqdm import torch @@ -83,13 +82,19 @@ def get_models(device, dim_f, dim_t, n_fft): warnings.filterwarnings("ignore") +import sys +now_dir = os.getcwd() +sys.path.append(now_dir) +from config import Config + cpu = torch.device("cpu") -if torch.cuda.is_available(): - device = torch.device("cuda:0") -elif torch.backends.mps.is_available(): - device = torch.device("mps") -else: - device = torch.device("cpu") +device=Config().device +# if torch.cuda.is_available(): +# device = torch.device("cuda:0") +# elif torch.backends.mps.is_available(): +# device = torch.device("mps") +# else: +# device = torch.device("cpu") class Predictor: @@ -98,9 +103,11 @@ class Predictor: self.model_ = get_models( device=cpu, dim_f=args.dim_f, dim_t=args.dim_t, n_fft=args.n_fft ) + import onnxruntime as ort + print(ort.get_available_providers()) self.model = ort.InferenceSession( os.path.join(args.onnx, self.model_.target_name + ".onnx"), - providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + providers=["CUDAExecutionProvider", "DmlExecutionProvider","CPUExecutionProvider"], ) print("onnx load done") diff --git a/config.py b/config.py index 8b7f1ff..035713a 100644 --- a/config.py +++ b/config.py @@ -36,10 +36,10 @@ class Config: self.noparallel, self.noautoopen, ) = self.arg_parse() + self.instead="" self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() - @staticmethod - def arg_parse() -> tuple: + def arg_parse(self) -> tuple: exe = sys.executable or "python" parser = argparse.ArgumentParser() parser.add_argument("--port", type=int, default=7865, help="Listen port") @@ -53,10 +53,15 @@ class Config: action="store_true", help="Do not open in browser automatically", ) + parser.add_argument( + "--dml", + action="store_true", + help="torch_dml", + ) cmd_opts = parser.parse_args() cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865 - + self.dml=cmd_opts.dml return ( cmd_opts.pycmd, cmd_opts.port, @@ -106,13 +111,13 @@ class Config: with open("trainset_preprocess_pipeline_print.py", "w") as f: f.write(strr) elif self.has_mps(): - print("No supported Nvidia GPU found, use MPS instead") - self.device = "mps" + print("No supported Nvidia GPU found") + self.device = self.instead="mps" self.is_half = False use_fp32_config() else: - print("No supported Nvidia GPU found, use CPU instead") - self.device = "cpu" + print("No supported Nvidia GPU found") + self.device = self.instead="cpu" self.is_half = False use_fp32_config() @@ -137,5 +142,20 @@ class Config: x_query = 5 x_center = 30 x_max = 32 - + if(self.dml==True): + print("use DirectML instead") + try:os.rename("runtime\Lib\site-packages\onnxruntime","runtime\Lib\site-packages\onnxruntime-cuda") + except:pass + try:os.rename("runtime\Lib\site-packages\onnxruntime-dml","runtime\Lib\site-packages\onnxruntime") + except:pass + import torch_directml + self.device= torch_directml.device(torch_directml.default_device()) + self.is_half=False + else: + if(self.instead): + print("use %s instead"%self.instead) + try:os.rename("runtime\Lib\site-packages\onnxruntime","runtime\Lib\site-packages\onnxruntime-cuda") + except:pass + try:os.rename("runtime\Lib\site-packages\onnxruntime-dml","runtime\Lib\site-packages\onnxruntime") + except:pass return x_pad, x_query, x_center, x_max diff --git a/extract_f0_rmvpe.py b/extract_f0_rmvpe.py index 55dd97b..00ca16c 100644 --- a/extract_f0_rmvpe.py +++ b/extract_f0_rmvpe.py @@ -36,13 +36,13 @@ class FeatureInput(object): def compute_f0(self, path, f0_method): x = load_audio(path, self.fs) - p_len = x.shape[0] // self.hop + # p_len = x.shape[0] // self.hop if f0_method == "rmvpe": if hasattr(self, "model_rmvpe") == False: from lib.rmvpe import RMVPE print("loading rmvpe model") - self.model_rmvpe = RMVPE("rmvpe.pt", is_half=True, device="cuda") + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=is_half, device="cuda") f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) return f0 diff --git a/extract_f0_rmvpe_dml.py b/extract_f0_rmvpe_dml.py new file mode 100644 index 0000000..6a46fbd --- /dev/null +++ b/extract_f0_rmvpe_dml.py @@ -0,0 +1,129 @@ +import os, traceback, sys, parselmouth + +now_dir = os.getcwd() +sys.path.append(now_dir) +from lib.audio import load_audio +import pyworld +import numpy as np, logging + +logging.getLogger("numba").setLevel(logging.WARNING) + +exp_dir = sys.argv[1] +import torch_directml +device = torch_directml.device(torch_directml.default_device()) +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + +class FeatureInput(object): + def __init__(self, samplerate=16000, hop_size=160): + self.fs = samplerate + self.hop = hop_size + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + def compute_f0(self, path, f0_method): + x = load_audio(path, self.fs) + # p_len = x.shape[0] // self.hop + if f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device=device) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + return f0 + + def coarse_f0(self, f0): + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( + self.f0_bin - 2 + ) / (self.f0_mel_max - self.f0_mel_min) + 1 + + # use 0 or 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 + f0_coarse = np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( + f0_coarse.max(), + f0_coarse.min(), + ) + return f0_coarse + + def go(self, paths, f0_method): + if len(paths) == 0: + printt("no-f0-todo") + else: + printt("todo-f0-%s" % len(paths)) + n = max(len(paths) // 5, 1) # 每个进程最多打印5条 + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if idx % n == 0: + printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) + if ( + os.path.exists(opt_path1 + ".npy") == True + and os.path.exists(opt_path2 + ".npy") == True + ): + continue + featur_pit = self.compute_f0(inp_path, f0_method) + np.save( + opt_path2, + featur_pit, + allow_pickle=False, + ) # nsf + coarse_pit = self.coarse_f0(featur_pit) + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + except: + printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + + +if __name__ == "__main__": + # exp_dir=r"E:\codes\py39\dataset\mi-test" + # n_p=16 + # f = open("%s/log_extract_f0.log"%exp_dir, "w") + printt(sys.argv) + featureInput = FeatureInput() + paths = [] + inp_root = "%s/1_16k_wavs" % (exp_dir) + opt_root1 = "%s/2a_f0" % (exp_dir) + opt_root2 = "%s/2b-f0nsf" % (exp_dir) + + os.makedirs(opt_root1, exist_ok=True) + os.makedirs(opt_root2, exist_ok=True) + for name in sorted(list(os.listdir(inp_root))): + inp_path = "%s/%s" % (inp_root, name) + if "spec" in inp_path: + continue + opt_path1 = "%s/%s" % (opt_root1, name) + opt_path2 = "%s/%s" % (opt_root2, name) + paths.append([inp_path, opt_path1, opt_path2]) + try: + featureInput.go(paths, "rmvpe") + except: + printt("f0_all_fail-%s" % (traceback.format_exc())) + # ps = [] + # for i in range(n_p): + # p = Process( + # target=featureInput.go, + # args=( + # paths[i::n_p], + # f0method, + # ), + # ) + # ps.append(p) + # p.start() + # for i in range(n_p): + # ps[i].join() diff --git a/extract_feature_print.py b/extract_feature_print.py index 780ffbc..e44ca04 100644 --- a/extract_feature_print.py +++ b/extract_feature_print.py @@ -3,7 +3,7 @@ import os, sys, traceback os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" -# device=sys.argv[1] +device=sys.argv[1] n_part = int(sys.argv[2]) i_part = int(sys.argv[3]) if len(sys.argv) == 6: @@ -18,13 +18,22 @@ import torch import torch.nn.functional as F import soundfile as sf import numpy as np -from fairseq import checkpoint_utils +import fairseq -device = "cpu" -if torch.cuda.is_available(): - device = "cuda" -elif torch.backends.mps.is_available(): - device = "mps" +if("privateuseone"not in device): + device = "cpu" + if torch.cuda.is_available(): + device = "cuda" + elif torch.backends.mps.is_available(): + device = "mps" +else: + import torch_directml + device = torch_directml.device(torch_directml.default_device()) + def forward_dml(ctx, x, scale): + ctx.scale = scale + res = x.clone().detach() + return res + fairseq.modules.grad_multiply.GradMultiply.forward=forward_dml f = open("%s/extract_f0_feature.log" % exp_dir, "a+") @@ -70,7 +79,7 @@ if os.access(model_path, os.F_OK) == False: % model_path ) exit(0) -models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( +models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task( [model_path], suffix="", ) diff --git a/gui_v1.py b/gui_v1.py index 0d45da6..3216403 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -1,5 +1,5 @@ -import os, sys - +import os, sys,pdb +os.environ["OMP_NUM_THREADS"]="2" if sys.platform == "darwin": os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" @@ -46,20 +46,21 @@ if __name__ == "__main__": import torch.nn.functional as F import torchaudio.transforms as tat from i18n import I18nAuto - + import rvc_for_realtime i18n = I18nAuto() - device = torch.device( - "cuda" - if torch.cuda.is_available() - else ("mps" if torch.backends.mps.is_available() else "cpu") - ) + device=rvc_for_realtime.config.device + # device = torch.device( + # "cuda" + # if torch.cuda.is_available() + # else ("mps" if torch.backends.mps.is_available() else "cpu") + # ) current_dir = os.getcwd() inp_q = Queue() opt_q = Queue() n_cpu = min(cpu_count(), 8) for _ in range(n_cpu): Harvest(inp_q, opt_q).start() - from rvc_for_realtime import RVC + class GUIConfig: def __init__(self) -> None: @@ -75,7 +76,7 @@ if __name__ == "__main__": self.I_noise_reduce = False self.O_noise_reduce = False self.index_rate = 0.3 - self.n_cpu = min(n_cpu, 8) + self.n_cpu = min(n_cpu, 6) self.f0method = "harvest" self.sg_input_device = "" self.sg_output_device = "" @@ -239,7 +240,7 @@ if __name__ == "__main__": [ sg.Text(i18n("采样长度")), sg.Slider( - range=(0.12, 2.4), + range=(0.09, 2.4), key="block_time", resolution=0.03, orientation="h", @@ -271,7 +272,7 @@ if __name__ == "__main__": [ sg.Text(i18n("额外推理时长")), sg.Slider( - range=(0.05, 3.00), + range=(0.05, 5.00), key="extra_time", resolution=0.01, orientation="h", @@ -391,7 +392,7 @@ if __name__ == "__main__": def start_vc(self): torch.cuda.empty_cache() self.flag_vc = True - self.rvc = RVC( + self.rvc = rvc_for_realtime.RVC( self.config.pitch, self.config.pth_path, self.config.index_path, @@ -510,7 +511,6 @@ if __name__ == "__main__": self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata) # infer inp = torch.from_numpy(self.input_wav).to(device) - ##0 res1 = self.resampler(inp) ###55% rate1 = self.block_frame / ( diff --git a/infer-web.py b/infer-web.py index cd89f96..b2d888f 100644 --- a/infer-web.py +++ b/infer-web.py @@ -23,7 +23,7 @@ import ffmpeg import gradio as gr import soundfile as sf from config import Config -from fairseq import checkpoint_utils +import fairseq from i18n import I18nAuto from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, @@ -43,7 +43,9 @@ logging.getLogger("numba").setLevel(logging.WARNING) now_dir = os.getcwd() tmp = os.path.join(now_dir, "TEMP") shutil.rmtree(tmp, ignore_errors=True) -shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) +shutil.rmtree( + "%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True +) shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) os.makedirs(tmp, exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) @@ -54,6 +56,12 @@ torch.manual_seed(114514) config = Config() +if(config.dml==True): + def forward_dml(ctx, x, scale): + ctx.scale = scale + res = x.clone().detach() + return res + fairseq.modules.grad_multiply.GradMultiply.forward=forward_dml i18n = I18nAuto() i18n.print() # 判断是否有能用来训练和加速推理的N卡 @@ -124,7 +132,7 @@ hubert_model = None def load_hubert(): global hubert_model - models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( ["hubert_base.pt"], suffix="", ) @@ -443,7 +451,15 @@ def get_vc(sid, to_return_protect0, to_return_protect1): del net_g, cpt if torch.cuda.is_available(): torch.cuda.empty_cache() - return {"visible": False, "__type__": "update"} + return {"visible": False, "__type__": "update"},{ + "visible": True, + "value": to_return_protect0, + "__type__": "update", + },{ + "visible": True, + "value": to_return_protect1, + "__type__": "update", + },"","" person = "%s/%s" % (weight_root, sid) print("loading %s" % person) @@ -454,7 +470,7 @@ def get_vc(sid, to_return_protect0, to_return_protect1): if if_f0 == 0: to_return_protect0 = to_return_protect1 = { "visible": False, - "value": 0.5, + "value": 0.33, "__type__": "update", } else: @@ -488,11 +504,15 @@ def get_vc(sid, to_return_protect0, to_return_protect1): net_g = net_g.float() vc = VC(tgt_sr, config) n_spk = cpt["config"][-3] + index={ + "value":get_index_path_from_model(sid), + "__type__": "update" + } return ( {"visible": True, "maximum": n_spk, "__type__": "update"}, to_return_protect0, to_return_protect1, - get_index_path_from_model(sid), + index,index ) @@ -608,58 +628,55 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp p, ), ).start() - while 1: - with open( - "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" - ) as f: - yield (f.read()) - sleep(1) - if done[0]: - break - with open( - "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" - ) as f: - log = f.read() - print(log) - yield log else: - gpus_rmvpe = gpus_rmvpe.split("-") - leng = len(gpus_rmvpe) - ps = [] - for idx, n_g in enumerate(gpus_rmvpe): - cmd = ( - config.python_cmd - + ' extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' - % (leng, idx, n_g, now_dir, exp_dir, config.is_half) + if(gpus_rmvpe!="-"): + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = ( + config.python_cmd + + ' extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' + % (leng, idx, n_g, now_dir, exp_dir, config.is_half) + ) + print(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done_multi,# + args=( + done, + ps, + ), + ).start() + else: + cmd = config.python_cmd + ' extract_f0_rmvpe_dml.py "%s/logs/%s" ' % ( + now_dir, exp_dir ) print(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - ps.append(p) - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done_multi, - args=( - done, - ps, - ), - ).start() - while 1: - with open( - "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" - ) as f: - yield (f.read()) - sleep(1) - if done[0]: - break + p.wait() + done = [True] + while 1: with open( "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" ) as f: - log = f.read() - print(log) - yield log + yield (f.read()) + sleep(1) + if done[0]: + break + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + log = f.read() + print(log) + yield log ####对不同part分别开多进程 """ n_part=int(sys.argv[1]) @@ -937,8 +954,8 @@ def click_train( batch_size12, total_epoch11, save_epoch10, - "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b", - "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b", + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", 1 if if_save_latest13 == i18n("是") else 0, 1 if if_cache_gpu17 == i18n("是") else 0, 1 if if_save_every_weights18 == i18n("是") else 0, @@ -953,7 +970,8 @@ def click_train( # but4.click(train_index, [exp_dir1], info3) def train_index(exp_dir1, version19): - exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) + # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) + exp_dir = "logs/%s" % ( exp_dir1) os.makedirs(exp_dir, exist_ok=True) feature_dir = ( "%s/3_feature256" % (exp_dir) @@ -975,7 +993,6 @@ def train_index(exp_dir1, version19): np.random.shuffle(big_npy_idx) big_npy = big_npy[big_npy_idx] if big_npy.shape[0] > 2e5: - # if(1): infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) yield "\n".join(infos) try: @@ -1012,7 +1029,7 @@ def train_index(exp_dir1, version19): "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), ) - # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) + infos.append("adding") yield "\n".join(infos) batch_size_add = 8192 @@ -1098,23 +1115,33 @@ def train1key( p = Popen(cmd, shell=True, cwd=now_dir) p.wait() else: - gpus_rmvpe = gpus_rmvpe.split("-") - leng = len(gpus_rmvpe) - ps = [] - for idx, n_g in enumerate(gpus_rmvpe): - cmd = config.python_cmd + ' extract_f0_rmvpe.py %s %s %s "%s" %s ' % ( - leng, - idx, - n_g, - model_log_dir, - config.is_half, + if(gpus_rmvpe!="-"): + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = config.python_cmd + ' extract_f0_rmvpe.py %s %s %s "%s" %s ' % ( + leng, + idx, + n_g, + model_log_dir, + config.is_half, + ) + yield get_info_str(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + for p in ps: + p.wait() + else:#dml + cmd = config.python_cmd + ' extract_f0_rmvpe_dml.py "%s" ' % ( + model_log_dir ) yield get_info_str(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - ps.append(p) - for p in ps: p.wait() with open(extract_f0_feature_log_path, "r") as f: print(f.read()) @@ -1282,8 +1309,6 @@ def train1key( yield get_info_str(info) np.save("%s/total_fea.npy" % model_log_dir, big_npy) - - # n_ivf = big_npy.shape[0] // 39 n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) @@ -1291,11 +1316,7 @@ def train1key( index_ivf = faiss.extract_index_ivf(index) # index_ivf.nprobe = 1 index.train(big_npy) - faiss.write_index( - index, - "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" - % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), - ) + faiss.write_index(index,"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"% (model_log_dir.replace(now_dir+"/",""), n_ivf, index_ivf.nprobe, exp_dir1, version19)) yield get_info_str("adding index") batch_size_add = 8192 for i in range(0, big_npy.shape[0], batch_size_add): @@ -1303,7 +1324,7 @@ def train1key( faiss.write_index( index, "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" - % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + % (model_log_dir.replace(now_dir+"/",""), n_ivf, index_ivf.nprobe, exp_dir1, version19), ) yield get_info_str( "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index" @@ -1329,9 +1350,10 @@ def change_info_(ckpt_path): return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} +F0GPUVisible = config.dml == False def change_f0_method(f0method8): if f0method8 == "rmvpe_gpu": - visible = True + visible = F0GPUVisible else: visible = False return {"visible": visible, "__type__": "update"} @@ -1428,7 +1450,7 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), - choices=["pm", "harvest", "crepe", "rmvpe"], + choices=["pm", "harvest", "crepe", "rmvpe"]if config.dml==False else ["pm", "harvest", "rmvpe"], value="pm", interactive=True, ) @@ -1534,7 +1556,7 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), - choices=["pm", "harvest", "crepe", "rmvpe"], + choices=["pm", "harvest", "crepe", "rmvpe"]if config.dml==False else ["pm", "harvest", "rmvpe"], value="pm", interactive=True, ) @@ -1643,7 +1665,7 @@ with gr.Blocks(title="RVC WebUI") as app: sid0.change( fn=get_vc, inputs=[sid0, protect0, protect1], - outputs=[spk_item, protect0, protect1, file_index2], + outputs=[spk_item, protect0, protect1, file_index2, file_index4], ) with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): with gr.Group(): @@ -1768,13 +1790,13 @@ with gr.Blocks(title="RVC WebUI") as app: gpus6 = gr.Textbox( label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), value=gpus, - interactive=True, + interactive=True,visible=F0GPUVisible ) - gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info) + gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info,visible=F0GPUVisible) with gr.Column(): f0method8 = gr.Radio( label=i18n( - "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢" + "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢,rmvpe效果最好且微吃CPU/GPU" ), choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], value="rmvpe_gpu", @@ -1786,7 +1808,7 @@ with gr.Blocks(title="RVC WebUI") as app: ), value="%s-%s" % (gpus, gpus), interactive=True, - visible=True, + visible=F0GPUVisible, ) but2 = gr.Button(i18n("特征提取"), variant="primary") info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) diff --git a/rvc_for_realtime.py b/rvc_for_realtime.py index da48794..3a4af04 100644 --- a/rvc_for_realtime.py +++ b/rvc_for_realtime.py @@ -1,12 +1,12 @@ +import os, sys import faiss, torch, traceback, parselmouth, numpy as np, torchcrepe, torch.nn as nn, pyworld -from fairseq import checkpoint_utils +import fairseq from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono, ) -import os, sys from time import time as ttime import torch.nn.functional as F import scipy.signal as signal @@ -18,8 +18,14 @@ from multiprocessing import Manager as M mm = M() config = Config() - - +if(config.dml==True): + def forward_dml(ctx, x, scale): + ctx.scale = scale + res = x.clone().detach() + return res + fairseq.modules.grad_multiply.GradMultiply.forward=forward_dml +# config.device=torch.device("cpu")########强制cpu测试 +# config.is_half=False########强制cpu测试 class RVC: def __init__( self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device @@ -31,6 +37,7 @@ class RVC: global config self.inp_q = inp_q self.opt_q = opt_q + # device="cpu"########强制cpu测试 self.device = device self.f0_up_key = key self.time_step = 160 / 16000 * 1000 @@ -46,7 +53,7 @@ class RVC: self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) print("index search enabled") self.index_rate = index_rate - models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( ["hubert_base.pt"], suffix="", ) @@ -80,6 +87,7 @@ class RVC: del self.net_g.enc_q print(self.net_g.load_state_dict(cpt["weight"], strict=False)) self.net_g.eval().to(device) + # print(2333333333,device,config.device,self.device)#net_g是device,hubert是config.device if config.is_half: self.net_g = self.net_g.half() else: @@ -124,7 +132,7 @@ class RVC: pad_size = (p_len - len(f0) + 1) // 2 if pad_size > 0 or p_len - len(f0) - pad_size > 0: - print(pad_size, p_len - len(f0) - pad_size) + # print(pad_size, p_len - len(f0) - pad_size) f0 = np.pad( f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" ) @@ -175,7 +183,10 @@ class RVC: return self.get_f0_post(f0bak) def get_f0_crepe(self, x, f0_up_key): + if self.device.type == "privateuseone":###不支持dml,cpu又太慢用不成,拿pm顶替 + return self.get_f0(x, f0_up_key,1,"pm") audio = torch.tensor(np.copy(x))[None].float() + # print("using crepe,device:%s"%self.device) f0, pd = torchcrepe.predict( audio, self.sr, @@ -184,6 +195,7 @@ class RVC: self.f0_max, "full", batch_size=512, + # device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用 device=self.device, return_periodicity=True, ) @@ -197,10 +209,12 @@ class RVC: def get_f0_rmvpe(self, x, f0_up_key): if hasattr(self, "model_rmvpe") == False: from lib.rmvpe import RMVPE - print("loading rmvpe model") self.model_rmvpe = RMVPE( - "rmvpe.pt", is_half=self.is_half, device=self.device + # "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑 + # "rmvpe.pt", is_half=False, device=self.device####dml配置 + # "rmvpe.pt", is_half=False, device="cpu"####锁定cpu配置 + "rmvpe.pt", is_half=self.is_half, device=self.device####正常逻辑 ) # self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device) f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) @@ -279,6 +293,7 @@ class RVC: sid = torch.LongTensor([ii]).to(self.device) with torch.no_grad(): if self.if_f0 == 1: + # print(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2) infered_audio = ( self.net_g.infer( feats, p_len, cache_pitch, cache_pitchf, sid, rate2 diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py index 62671ba..d9088a2 100644 --- a/trainset_preprocess_pipeline_print.py +++ b/trainset_preprocess_pipeline_print.py @@ -1,139 +1,139 @@ -import sys, os, multiprocessing -from scipy import signal - -now_dir = os.getcwd() -sys.path.append(now_dir) -print(sys.argv) -inp_root = sys.argv[1] -sr = int(sys.argv[2]) -n_p = int(sys.argv[3]) -exp_dir = sys.argv[4] -noparallel = sys.argv[5] == "True" -import numpy as np, os, traceback -from lib.slicer2 import Slicer -import librosa, traceback -from scipy.io import wavfile -import multiprocessing -from lib.audio import load_audio - -mutex = multiprocessing.Lock() -f = open("%s/preprocess.log" % exp_dir, "a+") - - -def println(strr): - mutex.acquire() - print(strr) - f.write("%s\n" % strr) - f.flush() - mutex.release() - - -class PreProcess: - def __init__(self, sr, exp_dir): - self.slicer = Slicer( - sr=sr, - threshold=-42, - min_length=1500, - min_interval=400, - hop_size=15, - max_sil_kept=500, - ) - self.sr = sr - self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr) - self.per = 3.0 - self.overlap = 0.3 - self.tail = self.per + self.overlap - self.max = 0.9 - self.alpha = 0.75 - self.exp_dir = exp_dir - self.gt_wavs_dir = "%s/0_gt_wavs" % exp_dir - self.wavs16k_dir = "%s/1_16k_wavs" % exp_dir - os.makedirs(self.exp_dir, exist_ok=True) - os.makedirs(self.gt_wavs_dir, exist_ok=True) - os.makedirs(self.wavs16k_dir, exist_ok=True) - - def norm_write(self, tmp_audio, idx0, idx1): - tmp_max = np.abs(tmp_audio).max() - if tmp_max > 2.5: - print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max)) - return - tmp_audio = (tmp_audio / tmp_max * (self.max * self.alpha)) + ( - 1 - self.alpha - ) * tmp_audio - wavfile.write( - "%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), - self.sr, - tmp_audio.astype(np.float32), - ) - tmp_audio = librosa.resample( - tmp_audio, orig_sr=self.sr, target_sr=16000 - ) # , res_type="soxr_vhq" - wavfile.write( - "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1), - 16000, - tmp_audio.astype(np.float32), - ) - - def pipeline(self, path, idx0): - try: - audio = load_audio(path, self.sr) - # zero phased digital filter cause pre-ringing noise... - # audio = signal.filtfilt(self.bh, self.ah, audio) - audio = signal.lfilter(self.bh, self.ah, audio) - - idx1 = 0 - for audio in self.slicer.slice(audio): - i = 0 - while 1: - start = int(self.sr * (self.per - self.overlap) * i) - i += 1 - if len(audio[start:]) > self.tail * self.sr: - tmp_audio = audio[start : start + int(self.per * self.sr)] - self.norm_write(tmp_audio, idx0, idx1) - idx1 += 1 - else: - tmp_audio = audio[start:] - idx1 += 1 - break - self.norm_write(tmp_audio, idx0, idx1) - println("%s->Suc." % path) - except: - println("%s->%s" % (path, traceback.format_exc())) - - def pipeline_mp(self, infos): - for path, idx0 in infos: - self.pipeline(path, idx0) - - def pipeline_mp_inp_dir(self, inp_root, n_p): - try: - infos = [ - ("%s/%s" % (inp_root, name), idx) - for idx, name in enumerate(sorted(list(os.listdir(inp_root)))) - ] - if noparallel: - for i in range(n_p): - self.pipeline_mp(infos[i::n_p]) - else: - ps = [] - for i in range(n_p): - p = multiprocessing.Process( - target=self.pipeline_mp, args=(infos[i::n_p],) - ) - ps.append(p) - p.start() - for i in range(n_p): - ps[i].join() - except: - println("Fail. %s" % traceback.format_exc()) - - -def preprocess_trainset(inp_root, sr, n_p, exp_dir): - pp = PreProcess(sr, exp_dir) - println("start preprocess") - println(sys.argv) - pp.pipeline_mp_inp_dir(inp_root, n_p) - println("end preprocess") - - -if __name__ == "__main__": - preprocess_trainset(inp_root, sr, n_p, exp_dir) +import sys, os, multiprocessing +from scipy import signal + +now_dir = os.getcwd() +sys.path.append(now_dir) +print(sys.argv) +inp_root = sys.argv[1] +sr = int(sys.argv[2]) +n_p = int(sys.argv[3]) +exp_dir = sys.argv[4] +noparallel = sys.argv[5] == "True" +import numpy as np, os, traceback +from lib.slicer2 import Slicer +import librosa, traceback +from scipy.io import wavfile +import multiprocessing +from lib.audio import load_audio + +mutex = multiprocessing.Lock() +f = open("%s/preprocess.log" % exp_dir, "a+") + + +def println(strr): + mutex.acquire() + print(strr) + f.write("%s\n" % strr) + f.flush() + mutex.release() + + +class PreProcess: + def __init__(self, sr, exp_dir): + self.slicer = Slicer( + sr=sr, + threshold=-42, + min_length=1500, + min_interval=400, + hop_size=15, + max_sil_kept=500, + ) + self.sr = sr + self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr) + self.per = 3.0 + self.overlap = 0.3 + self.tail = self.per + self.overlap + self.max = 0.9 + self.alpha = 0.75 + self.exp_dir = exp_dir + self.gt_wavs_dir = "%s/0_gt_wavs" % exp_dir + self.wavs16k_dir = "%s/1_16k_wavs" % exp_dir + os.makedirs(self.exp_dir, exist_ok=True) + os.makedirs(self.gt_wavs_dir, exist_ok=True) + os.makedirs(self.wavs16k_dir, exist_ok=True) + + def norm_write(self, tmp_audio, idx0, idx1): + tmp_max = np.abs(tmp_audio).max() + if tmp_max > 2.5: + print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max)) + return + tmp_audio = (tmp_audio / tmp_max * (self.max * self.alpha)) + ( + 1 - self.alpha + ) * tmp_audio + wavfile.write( + "%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), + self.sr, + tmp_audio.astype(np.float32), + ) + tmp_audio = librosa.resample( + tmp_audio, orig_sr=self.sr, target_sr=16000 + ) # , res_type="soxr_vhq" + wavfile.write( + "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1), + 16000, + tmp_audio.astype(np.float32), + ) + + def pipeline(self, path, idx0): + try: + audio = load_audio(path, self.sr) + # zero phased digital filter cause pre-ringing noise... + # audio = signal.filtfilt(self.bh, self.ah, audio) + audio = signal.lfilter(self.bh, self.ah, audio) + + idx1 = 0 + for audio in self.slicer.slice(audio): + i = 0 + while 1: + start = int(self.sr * (self.per - self.overlap) * i) + i += 1 + if len(audio[start:]) > self.tail * self.sr: + tmp_audio = audio[start : start + int(self.per * self.sr)] + self.norm_write(tmp_audio, idx0, idx1) + idx1 += 1 + else: + tmp_audio = audio[start:] + idx1 += 1 + break + self.norm_write(tmp_audio, idx0, idx1) + println("%s->Suc." % path) + except: + println("%s->%s" % (path, traceback.format_exc())) + + def pipeline_mp(self, infos): + for path, idx0 in infos: + self.pipeline(path, idx0) + + def pipeline_mp_inp_dir(self, inp_root, n_p): + try: + infos = [ + ("%s/%s" % (inp_root, name), idx) + for idx, name in enumerate(sorted(list(os.listdir(inp_root)))) + ] + if noparallel: + for i in range(n_p): + self.pipeline_mp(infos[i::n_p]) + else: + ps = [] + for i in range(n_p): + p = multiprocessing.Process( + target=self.pipeline_mp, args=(infos[i::n_p],) + ) + ps.append(p) + p.start() + for i in range(n_p): + ps[i].join() + except: + println("Fail. %s" % traceback.format_exc()) + + +def preprocess_trainset(inp_root, sr, n_p, exp_dir): + pp = PreProcess(sr, exp_dir) + println("start preprocess") + println(sys.argv) + pp.pipeline_mp_inp_dir(inp_root, n_p) + println("end preprocess") + + +if __name__ == "__main__": + preprocess_trainset(inp_root, sr, n_p, exp_dir) diff --git a/values1.json b/values1.json new file mode 100644 index 0000000..64caaff --- /dev/null +++ b/values1.json @@ -0,0 +1 @@ +{"pth_path": "weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_input_device": "VoiceMeeter Output (VB-Audio Vo (MME)", "sg_output_device": "VoiceMeeter Aux Input (VB-Audio (MME)", "threhold": -45.0, "pitch": 0.0, "index_rate": 1.0, "block_time": 0.09, "crossfade_length": 0.15, "extra_time": 5.0, "n_cpu": 8.0, "f0method": "rmvpe"} \ No newline at end of file diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index 7b3fe86..87ab2ef 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -130,12 +130,17 @@ class VC(object): elif f0_method == "rmvpe": if hasattr(self, "model_rmvpe") == False: from lib.rmvpe import RMVPE - print("loading rmvpe model") self.model_rmvpe = RMVPE( "rmvpe.pt", is_half=self.is_half, device=self.device ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + if("privateuseone"in str(self.device)):#clean ortruntime memory + del self.model_rmvpe.model + del self.model_rmvpe + print("cleaning ortruntime memory") + f0 *= pow(2, f0_up_key / 12) # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) tf0 = self.sr // self.window # 每秒f0点数