From 0f9d2e6cac35dc5f388c090a0ac4f65f264feec3 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Tue, 11 Jul 2023 16:27:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E6=97=B6GUI=E6=94=AF=E6=8C=81rmvpe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实时GUI支持rmvpe --- gui_v1.py | 18 +++++++++++------- rvc_for_realtime.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/gui_v1.py b/gui_v1.py index 9dff0e5..916e56e 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -81,6 +81,7 @@ if __name__ == '__main__': data["pm"]=data["f0method"]=="pm" data["harvest"]=data["f0method"]=="harvest" data["crepe"]=data["f0method"]=="crepe" + data["rmvpe"]=data["f0method"]=="rmvpe" except: with open("values1.json", "w") as j: data = { @@ -94,7 +95,7 @@ if __name__ == '__main__': "block_time": "1", "crossfade_length": "0.04", "extra_time": "1", - "f0method": "harvest", + "f0method": "rmvpe", } return data @@ -193,6 +194,7 @@ if __name__ == '__main__': sg.Radio("pm","f0method",key="pm",default=data.get("pm","")==True), sg.Radio("harvest","f0method",key="harvest",default=data.get("harvest","")==True), sg.Radio("crepe","f0method",key="crepe",default=data.get("crepe","")==True), + sg.Radio("rmvpe","f0method",key="rmvpe",default=data.get("rmvpe","")==True), ], ], title=i18n("常规设置"), @@ -279,7 +281,7 @@ if __name__ == '__main__': "crossfade_length": values["crossfade_length"], "extra_time": values["extra_time"], "n_cpu": values["n_cpu"], - "f0method": ["pm","harvest","crepe"][[values["pm"],values["harvest"],values["crepe"]].index(True)], + "f0method": ["pm","harvest","crepe","rmvpe"][[values["pm"],values["harvest"],values["crepe"],values["rmvpe"]].index(True)], } with open("values1.json", "w") as j: json.dump(settings, j) @@ -312,7 +314,7 @@ if __name__ == '__main__': self.config.O_noise_reduce = values["O_noise_reduce"] self.config.index_rate = values["index_rate"] self.config.n_cpu = values["n_cpu"] - self.config.f0method = ["pm","harvest","crepe"][[values["pm"],values["harvest"],values["crepe"]].index(True)] + self.config.f0method = ["pm","harvest","crepe","rmvpe"][[values["pm"],values["harvest"],values["crepe"],values["rmvpe"]].index(True)] return True def start_vc(self): @@ -346,7 +348,7 @@ if __name__ == '__main__': self.fade_out_window: torch.Tensor = 1 - self.fade_in_window self.resampler = tat.Resample( orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32 - ) + ).to(device) thread_vc = threading.Thread(target=self.soundinput) thread_vc.start() @@ -389,13 +391,15 @@ if __name__ == '__main__': indata[i * hop_length : (i + 1) * hop_length] = 0 self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata) # infer - inp=torch.from_numpy(self.input_wav) + inp=torch.from_numpy(self.input_wav).to(device) + ##0 res1=self.resampler(inp) + ###55% rate1=self.block_frame/(self.extra_frame+ self.crossfade_frame+ self.sola_search_frame+ self.block_frame) rate2=(self.crossfade_frame + self.sola_search_frame + self.block_frame)/(self.extra_frame+ self.crossfade_frame+ self.sola_search_frame+ self.block_frame) - res2=self.rvc.infer(res1,res1[-self.block_frame:].numpy(),rate1,rate2,self.pitch,self.pitchf,self.config.f0method) + res2=self.rvc.infer(res1,res1[-self.block_frame:].cpu().numpy(),rate1,rate2,self.pitch,self.pitchf,self.config.f0method) self.output_wav_cache[-res2.shape[0]:]=res2 - infer_wav = self.output_wav_cache[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].to(device) + infer_wav = self.output_wav_cache[-self.crossfade_frame - self.sola_search_frame - self.block_frame :] # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC cor_nom = F.conv1d( infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame], diff --git a/rvc_for_realtime.py b/rvc_for_realtime.py index 7a56d1a..6eb2824 100644 --- a/rvc_for_realtime.py +++ b/rvc_for_realtime.py @@ -81,6 +81,7 @@ class RVC: self.net_g = self.net_g.half() else: self.net_g = self.net_g.float() + self.is_half=config.is_half except: print(traceback.format_exc()) @@ -102,6 +103,7 @@ class RVC: def get_f0(self, x, f0_up_key, n_cpu, method="harvest"): n_cpu = int(n_cpu) if (method == "crepe"): return self.get_f0_crepe(x, f0_up_key) + if (method == "rmvpe"): return self.get_f0_rmvpe(x, f0_up_key) if (method == "pm"): p_len = x.shape[0] // 160 f0 = ( @@ -181,6 +183,16 @@ class RVC: f0 *= pow(2, f0_up_key / 12) return self.get_f0_post(f0) + def get_f0_rmvpe(self, x, f0_up_key): + if (hasattr(self, "model_rmvpe") == False): + from rmvpe import RMVPE + print("loading rmvpe model") + # self.model_rmvpe = RMVPE("rmvpe.pt", is_half=self.is_half, device=self.device) + self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + f0 *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0) + def infer(self, feats: torch.Tensor, indata: np.ndarray, rate1, rate2, cache_pitch, cache_pitchf, f0method) -> np.ndarray: feats = feats.view(1, -1) if config.is_half: