diff --git a/extract_f0_print.py b/extract_f0_print.py index f96606c..0f6f5a8 100644 --- a/extract_f0_print.py +++ b/extract_f0_print.py @@ -74,6 +74,15 @@ class FeatureInput(object): frame_period=1000 * self.hop / self.fs, ) f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=False, device="cpu" + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) return f0 def coarse_f0(self, f0): diff --git a/extract_f0_rmvpe.py b/extract_f0_rmvpe.py new file mode 100644 index 0000000..a0c9bba --- /dev/null +++ b/extract_f0_rmvpe.py @@ -0,0 +1,135 @@ +import os, traceback, sys, parselmouth + +now_dir = os.getcwd() +sys.path.append(now_dir) +from lib.audio import load_audio +import pyworld +import numpy as np, logging + +logging.getLogger("numba").setLevel(logging.WARNING) + +n_part = int(sys.argv[1]) +i_part = int(sys.argv[2]) +i_gpu = sys.argv[3] +os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) +exp_dir = sys.argv[4] +is_half = sys.argv[5] +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + + +class FeatureInput(object): + def __init__(self, samplerate=16000, hop_size=160): + self.fs = samplerate + self.hop = hop_size + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + def compute_f0(self, path, f0_method): + x = load_audio(path, self.fs) + p_len = x.shape[0] // self.hop + if(f0_method=="rmvpe"): + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=True, device="cuda" + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + return f0 + + def coarse_f0(self, f0): + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( + self.f0_bin - 2 + ) / (self.f0_mel_max - self.f0_mel_min) + 1 + + # use 0 or 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 + f0_coarse = np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( + f0_coarse.max(), + f0_coarse.min(), + ) + return f0_coarse + + def go(self, paths, f0_method): + if len(paths) == 0: + printt("no-f0-todo") + else: + printt("todo-f0-%s" % len(paths)) + n = max(len(paths) // 5, 1) # 每个进程最多打印5条 + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if idx % n == 0: + printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) + if ( + os.path.exists(opt_path1 + ".npy") == True + and os.path.exists(opt_path2 + ".npy") == True + ): + continue + featur_pit = self.compute_f0(inp_path, f0_method) + np.save( + opt_path2, + featur_pit, + allow_pickle=False, + ) # nsf + coarse_pit = self.coarse_f0(featur_pit) + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + except: + printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + + +if __name__ == "__main__": + # exp_dir=r"E:\codes\py39\dataset\mi-test" + # n_p=16 + # f = open("%s/log_extract_f0.log"%exp_dir, "w") + printt(sys.argv) + featureInput = FeatureInput() + paths = [] + inp_root = "%s/1_16k_wavs" % (exp_dir) + opt_root1 = "%s/2a_f0" % (exp_dir) + opt_root2 = "%s/2b-f0nsf" % (exp_dir) + + os.makedirs(opt_root1, exist_ok=True) + os.makedirs(opt_root2, exist_ok=True) + for name in sorted(list(os.listdir(inp_root))): + inp_path = "%s/%s" % (inp_root, name) + if "spec" in inp_path: + continue + opt_path1 = "%s/%s" % (opt_root1, name) + opt_path2 = "%s/%s" % (opt_root2, name) + paths.append([inp_path, opt_path1, opt_path2]) + try: + featureInput.go(paths[i_part::n_part],"rmvpe") + except: + printt("f0_all_fail-%s" % (traceback.format_exc())) + # ps = [] + # for i in range(n_p): + # p = Process( + # target=featureInput.go, + # args=( + # paths[i::n_p], + # f0method, + # ), + # ) + # ps.append(p) + # p.start() + # for i in range(n_p): + # ps[i].join() diff --git a/infer-web.py b/infer-web.py index efd3c3a..13b2964 100644 --- a/infer-web.py +++ b/infer-web.py @@ -44,7 +44,7 @@ now_dir = os.getcwd() tmp = os.path.join(now_dir, "TEMP") shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree( - "%s/runtime/Lib/site-packages/lib.infer_pack" % (now_dir), ignore_errors=True + "%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True ) shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) os.makedirs(tmp, exist_ok=True) @@ -542,7 +542,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): f.close() cmd = ( config.python_cmd - + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s " + + ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s/logs/%s" ' % (trainset_dir, sr, n_p, now_dir, exp_dir) + str(config.noparallel) ) @@ -570,41 +570,83 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) -def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19): +def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19,gpus_rmvpe): gpus = gpus.split("-") os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") f.close() if if_f0: - cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s" % ( - now_dir, - exp_dir, - n_p, - f0method, - ) - print(cmd) - p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE - ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done, - args=( - done, - p, - ), - ).start() - while 1: - with open( - "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" - ) as f: - yield (f.read()) - sleep(1) - if done[0]: - break - with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: - log = f.read() - print(log) - yield log + if(f0method!="rmvpe_gpu"): + cmd = config.python_cmd + ' extract_f0_print.py "%s/logs/%s" %s %s' % ( + now_dir, + exp_dir, + n_p, + f0method, + ) + print(cmd) + p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done, + args=( + done, + p, + ), + ).start() + while 1: + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: + log = f.read() + print(log) + yield log + else: + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = ( + config.python_cmd + + ' extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' + % ( + leng, + idx, + n_g, + now_dir, + exp_dir, + config.is_half + ) + ) + print(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done_multi, + args=( + done, + ps, + ), + ).start() + while 1: + with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: + log = f.read() + print(log) + yield log ####对不同part分别开多进程 """ n_part=int(sys.argv[1]) @@ -618,7 +660,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19): for idx, n_g in enumerate(gpus): cmd = ( config.python_cmd - + " extract_feature_print.py %s %s %s %s %s/logs/%s %s" + + ' extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' % ( config.device, leng, @@ -854,7 +896,7 @@ def click_train( if gpus16: cmd = ( config.python_cmd - + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( exp_dir1, sr2, @@ -874,7 +916,7 @@ def click_train( else: cmd = ( config.python_cmd - + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( exp_dir1, sr2, @@ -995,7 +1037,7 @@ def train1key( gpus16, if_cache_gpu17, if_save_every_weights18, - version19, + version19,gpus_rmvpe ): infos = [] @@ -1018,7 +1060,7 @@ def train1key( open(preprocess_log_path, "w").close() cmd = ( config.python_cmd - + " trainset_preprocess_pipeline_print.py %s %s %s %s " + + ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s" ' % (trainset_dir4, sr_dict[sr2], np7, model_log_dir) + str(config.noparallel) ) @@ -1032,14 +1074,38 @@ def train1key( open(extract_f0_feature_log_path, "w") if if_f0_3: yield get_info_str("step2a:正在提取音高") - cmd = config.python_cmd + " extract_f0_print.py %s %s %s" % ( - model_log_dir, - np7, - f0method8, - ) - yield get_info_str(cmd) - p = Popen(cmd, shell=True, cwd=now_dir) - p.wait() + if(f0method8!="rmvpe_gpu"): + cmd = config.python_cmd + ' extract_f0_print.py "%s" %s %s' % ( + model_log_dir, + np7, + f0method8, + ) + yield get_info_str(cmd) + p = Popen(cmd, shell=True, cwd=now_dir) + p.wait() + else: + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = ( + config.python_cmd + + ' extract_f0_rmvpe.py %s %s %s "%s" %s ' + % ( + leng, + idx, + n_g, + model_log_dir, + config.is_half + ) + ) + yield get_info_str(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + for p in ps: + p.wait() with open(extract_f0_feature_log_path, "r") as f: print(f.read()) else: @@ -1050,7 +1116,7 @@ def train1key( leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): - cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % ( + cmd = config.python_cmd + ' extract_feature_print.py %s %s %s %s "%s" %s' % ( config.device, leng, idx, @@ -1131,7 +1197,7 @@ def train1key( if gpus16: cmd = ( config.python_cmd - + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( exp_dir1, sr2, @@ -1151,7 +1217,7 @@ def train1key( else: cmd = ( config.python_cmd - + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( exp_dir1, sr2, @@ -1252,6 +1318,10 @@ def change_info_(ckpt_path): traceback.print_exc() return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} +def change_f0_method(f0method8): + if(f0method8=="rmvpe_gpu"):visible=True + else:visible=False + return {"visible": visible, "__type__": "update"} def export_onnx(ModelPath, ExportedPath): global cpt @@ -1340,7 +1410,7 @@ with gr.Blocks(title="RVC WebUI") as app: ) f0method0 = gr.Radio( label=i18n( - "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" + "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), choices=["pm", "harvest", "crepe", "rmvpe"], value="pm", @@ -1442,7 +1512,7 @@ with gr.Blocks(title="RVC WebUI") as app: opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") f0method1 = gr.Radio( label=i18n( - "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" + "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" ), choices=["pm", "harvest", "crepe", "rmvpe"], value="pm", @@ -1630,7 +1700,7 @@ with gr.Blocks(title="RVC WebUI") as app: version19 = gr.Radio( label=i18n("版本"), choices=["v1", "v2"], - value="v1", + value="v2", interactive=True, visible=True, ) @@ -1680,15 +1750,26 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢" ), - choices=["pm", "harvest", "dio"], - value="harvest", + choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], + value="rmvpe_gpu", interactive=True, ) + gpus_rmvpe = gr.Textbox( + label=i18n("rmvpe卡号配置:以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程"), + value="%s-%s"%(gpus,gpus), + interactive=True, + visible=True + ) but2 = gr.Button(i18n("特征提取"), variant="primary") info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + f0method8.change( + fn=change_f0_method, + inputs=[f0method8], + outputs=[gpus_rmvpe], + ) but2.click( extract_f0_feature, - [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19], + [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19,gpus_rmvpe], [info2], ) with gr.Group(): @@ -1741,12 +1822,12 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Row(): pretrained_G14 = gr.Textbox( label=i18n("加载预训练底模G路径"), - value="pretrained/f0G40k.pth", + value="pretrained_v2/f0G40k.pth", interactive=True, ) pretrained_D15 = gr.Textbox( label=i18n("加载预训练底模D路径"), - value="pretrained/f0D40k.pth", + value="pretrained_v2/f0D40k.pth", interactive=True, ) sr2.change( @@ -1813,7 +1894,7 @@ with gr.Blocks(title="RVC WebUI") as app: gpus16, if_cache_gpu17, if_save_every_weights18, - version19, + version19,gpus_rmvpe ], info3, ) diff --git a/train_nsf_sim_cache_sid_load_pretrain.py b/train_nsf_sim_cache_sid_load_pretrain.py index ffd13e7..dafc44e 100644 --- a/train_nsf_sim_cache_sid_load_pretrain.py +++ b/train_nsf_sim_cache_sid_load_pretrain.py @@ -3,7 +3,6 @@ now_dir = os.getcwd() sys.path.append(os.path.join(now_dir)) sys.path.append(os.path.join(now_dir, "train")) -from lib.train import utils from lib.train import utils import datetime diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py index 784c0a9..62671ba 100644 --- a/trainset_preprocess_pipeline_print.py +++ b/trainset_preprocess_pipeline_print.py @@ -3,7 +3,7 @@ from scipy import signal now_dir = os.getcwd() sys.path.append(now_dir) - +print(sys.argv) inp_root = sys.argv[1] sr = int(sys.argv[2]) n_p = int(sys.argv[3])