From 6bc929abeab9402c1cda3caa6afafe8187f4eb10 Mon Sep 17 00:00:00 2001 From: yxlllc Date: Thu, 28 Dec 2023 18:02:41 +0800 Subject: [PATCH] optimize stream logic --- gui_v1.py | 218 +++++++++++++++++++++++++----------------------------- 1 file changed, 100 insertions(+), 118 deletions(-) diff --git a/gui_v1.py b/gui_v1.py index 86b52d3..72b13dc 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -12,8 +12,7 @@ now_dir = os.getcwd() sys.path.append(now_dir) import multiprocessing -stream_latency = -1 - +flag_vc = False def printt(strr, *args): if len(args) == 0: @@ -113,32 +112,36 @@ if __name__ == "__main__": self.pth_path: str = "" self.index_path: str = "" self.pitch: int = 0 - self.samplerate: int = 40000 - self.block_time: float = 1.0 # s - self.buffer_num: int = 1 + self.sr_type: str = "sr_model" + self.block_time: float = 0.25 # s self.threhold: int = -60 self.crossfade_time: float = 0.05 self.extra_time: float = 2.5 - self.I_noise_reduce = False - self.O_noise_reduce = False - self.rms_mix_rate = 0.0 - self.index_rate = 0.3 - self.n_cpu = min(n_cpu, 6) - self.f0method = "harvest" - self.sg_input_device = "" - self.sg_output_device = "" + self.I_noise_reduce: bool = False + self.O_noise_reduce: bool = False + self.use_pv: bool = False + self.rms_mix_rate: float = 0.0 + self.index_rate: float = 0.0 + self.n_cpu: int = min(n_cpu, 4) + self.f0method: str = "fcpe" + self.sg_input_device: str = "" + self.sg_output_device: str = "" class GUI: def __init__(self) -> None: self.gui_config = GUIConfig() self.config = Config() - self.flag_vc = False self.function = "vc" self.delay_time = 0 + self.input_devices = None + self.output_devices = None + self.input_devices_indices = None + self.output_devices_indices = None + self.stream = None + self.update_devices() self.launcher() def load(self): - input_devices, output_devices, _, _ = self.get_devices() try: with open("configs/config.json", "r") as j: data = json.load(j) @@ -149,25 +152,26 @@ if __name__ == "__main__": data["crepe"] = data["f0method"] == "crepe" data["rmvpe"] = data["f0method"] == "rmvpe" data["fcpe"] = data["f0method"] == "fcpe" - if data["sg_input_device"] not in input_devices: - data["sg_input_device"] = input_devices[sd.default.device[0]] - if data["sg_output_device"] not in output_devices: - data["sg_output_device"] = output_devices[sd.default.device[1]] + if data["sg_input_device"] not in self.input_devices: + data["sg_input_device"] = self.input_devices[self.input_devices_indices.index(sd.default.device[0])] + if data["sg_output_device"] not in self.output_devices: + data["sg_output_device"] = self.output_devices[self.output_devices_indices.index(sd.default.device[1])] except: with open("configs/config.json", "w") as j: data = { - "pth_path": " ", - "index_path": " ", - "sg_input_device": input_devices[sd.default.device[0]], - "sg_output_device": output_devices[sd.default.device[1]], + "pth_path": "", + "index_path": "", + "sg_input_device": self.input_devices[self.input_devices_indices.index(sd.default.device[0])], + "sg_output_device": self.output_devices[self.output_devices_indices.index(sd.default.device[1])], "sr_type": "sr_model", - "threhold": "-60", - "pitch": "0", - "index_rate": "0", - "rms_mix_rate": "0", - "block_time": "0.25", - "crossfade_length": "0.05", - "extra_time": "2.5", + "threhold": -60, + "pitch": 0, + "index_rate": 0, + "rms_mix_rate": 0, + "block_time": 0.25, + "crossfade_length": 0.05, + "extra_time": 2.5, + "n_cpu": 4, "f0method": "rmvpe", "use_jit": False, "use_pv": False, @@ -185,7 +189,6 @@ if __name__ == "__main__": data = self.load() self.config.use_jit = False # data.get("use_jit", self.config.use_jit) sg.theme("LightBlue3") - input_devices, output_devices, _, _ = self.get_devices() layout = [ [ sg.Frame( @@ -224,7 +227,7 @@ if __name__ == "__main__": [ sg.Text(i18n("输入设备")), sg.Combo( - input_devices, + self.input_devices, key="sg_input_device", default_value=data.get("sg_input_device", ""), ), @@ -232,7 +235,7 @@ if __name__ == "__main__": [ sg.Text(i18n("输出设备")), sg.Combo( - output_devices, + self.output_devices, key="sg_output_device", default_value=data.get("sg_output_device", ""), ), @@ -463,32 +466,27 @@ if __name__ == "__main__": self.event_handler() def event_handler(self): + global flag_vc while True: event, values = self.window.read() if event == sg.WINDOW_CLOSED: - self.flag_vc = False + self.stop_stream() exit() if event == "reload_devices": - prev_input = self.window["sg_input_device"].get() - prev_output = self.window["sg_output_device"].get() - input_devices, output_devices, _, _ = self.get_devices(update=True) - if prev_input not in input_devices: - self.gui_config.sg_input_device = input_devices[0] - else: - self.gui_config.sg_input_device = prev_input - self.window["sg_input_device"].Update(values=input_devices) + self.update_devices() + if self.gui_config.sg_input_device not in self.input_devices: + self.gui_config.sg_input_device = self.input_devices[0] + self.window["sg_input_device"].Update(values=self.input_devices) self.window["sg_input_device"].Update( value=self.gui_config.sg_input_device ) - if prev_output not in output_devices: - self.gui_config.sg_output_device = output_devices[0] - else: - self.gui_config.sg_output_device = prev_output - self.window["sg_output_device"].Update(values=output_devices) + if self.gui_config.sg_output_device not in self.output_devices: + self.gui_config.sg_output_device = self.output_devices[0] + self.window["sg_output_device"].Update(values=self.output_devices) self.window["sg_output_device"].Update( value=self.gui_config.sg_output_device ) - if event == "start_vc" and self.flag_vc == False: + if event == "start_vc" and not flag_vc: if self.set_values(values) == True: printt("cuda_is_available: %s", torch.cuda.is_available()) self.start_vc() @@ -527,22 +525,17 @@ if __name__ == "__main__": } with open("configs/config.json", "w") as j: json.dump(settings, j) - global stream_latency - while stream_latency < 0: - time.sleep(0.01) - self.delay_time = ( - stream_latency - + values["block_time"] - + values["crossfade_length"] - + 0.01 - ) + if self.stream is not None: + self.delay_time = ( + self.stream.latency[-1] + + values["block_time"] + + values["crossfade_length"] + + 0.01 + ) if values["I_noise_reduce"]: self.delay_time += min(values["crossfade_length"], 0.04) self.window["sr_stream"].update(self.gui_config.samplerate) self.window["delay_time"].update(int(self.delay_time * 1000)) - if event == "stop_vc" and self.flag_vc == True: - self.flag_vc = False - stream_latency = -1 # Parameter hot update if event == "threhold": self.gui_config.threhold = values["threhold"] @@ -560,7 +553,7 @@ if __name__ == "__main__": self.gui_config.f0method = event elif event == "I_noise_reduce": self.gui_config.I_noise_reduce = values["I_noise_reduce"] - if stream_latency > 0: + if self.stream is not None: self.delay_time += ( 1 if values["I_noise_reduce"] else -1 ) * min(values["crossfade_length"], 0.04) @@ -571,11 +564,10 @@ if __name__ == "__main__": self.gui_config.use_pv = values["use_pv"] elif event in ["vc", "im"]: self.function = event - elif event != "start_vc" and self.flag_vc == True: + elif event == "stop_vc" or event != "start_vc": # Other parameters do not support hot update - self.flag_vc = False - stream_latency = -1 - + self.stop_stream() + def set_values(self, values): if len(values["pth_path"].strip()) == 0: sg.popup(i18n("请选择pth文件")) @@ -593,6 +585,8 @@ if __name__ == "__main__": self.set_devices(values["sg_input_device"], values["sg_output_device"]) self.config.use_jit = False # values["use_jit"] # self.device_latency = values["device_latency"] + self.gui_config.sg_input_device = values["sg_input_device"] + self.gui_config.sg_output_device = values["sg_output_device"] self.gui_config.pth_path = values["pth_path"] self.gui_config.index_path = values["index_path"] self.gui_config.sr_type = ["sr_model", "sr_device"][ @@ -625,7 +619,6 @@ if __name__ == "__main__": def start_vc(self): torch.cuda.empty_cache() - self.flag_vc = True self.rvc = rvc_for_realtime.RVC( self.gui_config.pitch, self.gui_config.pth_path, @@ -732,34 +725,37 @@ if __name__ == "__main__": self.tg = TorchGate( sr=self.gui_config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9 ).to(self.config.device) - thread_vc = threading.Thread(target=self.soundinput) - thread_vc.start() - - def soundinput(self): - """ - 接受音频输入 - """ - channels = 1 if sys.platform == "darwin" else 2 - with sd.Stream( - channels=channels, - callback=self.audio_callback, - blocksize=self.block_frame, - samplerate=self.gui_config.samplerate, - dtype="float32", - ) as stream: - global stream_latency - stream_latency = stream.latency[-1] - while self.flag_vc: - time.sleep(self.gui_config.block_time) - printt("Audio block passed.") - printt("ENDing VC") + self.start_stream() + + def start_stream(self): + global flag_vc + if not flag_vc: + flag_vc = True + channels = 1 if sys.platform == "darwin" else 2 + self.stream = sd.Stream( + channels=channels, + callback=self.audio_callback, + blocksize=self.block_frame, + samplerate=self.gui_config.samplerate, + dtype="float32") + self.stream.start() + def stop_stream(self): + global flag_vc + if flag_vc: + flag_vc = False + if self.stream is not None: + self.stream.stop() + self.stream.close() + self.stream = None + def audio_callback( self, indata: np.ndarray, outdata: np.ndarray, frames, times, status ): """ 音频处理 """ + global flag_vc start_time = time.perf_counter() indata = librosa.to_mono(indata.T) if self.gui_config.threhold > -60: @@ -908,66 +904,52 @@ if __name__ == "__main__": infer_wav[: self.block_frame].repeat(2, 1).t().cpu().numpy() ) total_time = time.perf_counter() - start_time - self.window["infer_time"].update(int(total_time * 1000)) + if flag_vc: + self.window["infer_time"].update(int(total_time * 1000)) printt("Infer time: %.2f", total_time) - def get_devices(self, update: bool = True): + def update_devices(self): """获取设备列表""" - if update: - sd._terminate() - sd._initialize() + sd._terminate() + sd._initialize() devices = sd.query_devices() hostapis = sd.query_hostapis() for hostapi in hostapis: for device_idx in hostapi["devices"]: devices[device_idx]["hostapi_name"] = hostapi["name"] - input_devices = [ + self.input_devices = [ f"{d['name']} ({d['hostapi_name']})" for d in devices if d["max_input_channels"] > 0 ] - output_devices = [ + self.output_devices = [ f"{d['name']} ({d['hostapi_name']})" for d in devices if d["max_output_channels"] > 0 ] - input_devices_indices = [ + self.input_devices_indices = [ d["index"] if "index" in d else d["name"] for d in devices if d["max_input_channels"] > 0 ] - output_devices_indices = [ + self.output_devices_indices = [ d["index"] if "index" in d else d["name"] for d in devices if d["max_output_channels"] > 0 ] - return ( - input_devices, - output_devices, - input_devices_indices, - output_devices_indices, - ) - + def set_devices(self, input_device, output_device): """设置输出设备""" - ( - input_devices, - output_devices, - input_device_indices, - output_device_indices, - ) = self.get_devices() - sd.default.device[0] = input_device_indices[ - input_devices.index(input_device) + sd.default.device[0] = self.input_devices_indices[ + self.input_devices.index(input_device) ] - sd.default.device[1] = output_device_indices[ - output_devices.index(output_device) + sd.default.device[1] = self.output_devices_indices[ + self.output_devices.index(output_device) ] printt("Input device: %s:%s", str(sd.default.device[0]), input_device) printt("Output device: %s:%s", str(sd.default.device[1]), output_device) - + def get_device_samplerate(self): - return int( - sd.query_devices(device=sd.default.device[0])["default_samplerate"] - ) - + return int(sd.query_devices(device=sd.default.device[0])['default_samplerate']) + gui = GUI()