From c14721d9c317551f5ae318ee466a5ef5c7b2e2af Mon Sep 17 00:00:00 2001 From: Tps-F Date: Sat, 19 Aug 2023 10:57:09 +0000 Subject: [PATCH] Apply Code Formatter Change --- infer/modules/vc/modules.py | 103 +++++++++++++++++------------ infer/modules/vc/pipeline.py | 121 +++++++++++++++++------------------ infer/modules/vc/utils.py | 20 ++++-- 3 files changed, 137 insertions(+), 107 deletions(-) diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 1d967f7..f1a96eb 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -24,19 +24,19 @@ class VC: self.if_f0 = None self.version = None self.hubert_model = None - + self.config = config - + def get_vc(self, sid, to_return_protect0, to_return_protect1): person = f'{os.getenv("weight_root")}/{sid}' - print(f'loading {person}') - + print(f"loading {person}") + self.cpt = torch.load(person, map_location="cpu") self.tgt_sr = self.cpt["config"][-1] self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0] # n_spk self.if_f0 = self.cpt.get("f0", 1) self.version = self.cpt.get("version", "v1") - + to_return_protect0 = { "visible": self.if_f0 != 0, "value": to_return_protect0 if self.if_f0 != 0 else 0.5, @@ -47,16 +47,18 @@ class VC: "value": to_return_protect1 if self.if_f0 != 0 else 0.33, "__type__": "update", } - + synthesizer_class = { ("v1", 1): SynthesizerTrnMs256NSFsid, ("v1", 0): SynthesizerTrnMs256NSFsid_nono, ("v2", 1): SynthesizerTrnMs768NSFsid, - ("v2", 0): SynthesizerTrnMs768NSFsid_nono + ("v2", 0): SynthesizerTrnMs768NSFsid_nono, } - - self.net_g = synthesizer_class.get((self.version, self.if_f0), SynthesizerTrnMs256NSFsid)(*self.cpt["config"], is_half=self.config.is_half) - + + self.net_g = synthesizer_class.get( + (self.version, self.if_f0), SynthesizerTrnMs256NSFsid + )(*self.cpt["config"], is_half=self.config.is_half) + del self.net_g.enc_q self.net_g.load_state_dict(self.cpt["weight"], strict=False) @@ -65,23 +67,34 @@ class VC: self.net_g = self.net_g.half() else: self.net_g = self.net_g.float() - + self.pipeline = Pipeline(self.tgt_sr, self.config) n_spk = self.cpt["config"][-3] - index = { - "value": get_index_path_from_model(sid), - "__type__": "update" - } - + index = {"value": get_index_path_from_model(sid), "__type__": "update"} + return ( {"visible": True, "maximum": n_spk, "__type__": "update"}, to_return_protect0, to_return_protect1, index, - index + index, ) - - def vc_single(self, sid, input_audio_path, f0_up_key, f0_file, f0_method, file_index, file_index2, index_rate, filter_radius, resample_sr, rms_mix_rate, protect): + + def vc_single( + self, + sid, + input_audio_path, + f0_up_key, + f0_file, + f0_method, + file_index, + file_index2, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, + ): if input_audio_path is None: return "You need to upload an audio", None f0_up_key = int(f0_up_key) @@ -91,10 +104,10 @@ class VC: if audio_max > 1: audio /= audio_max times = [0, 0, 0] - + if self.hubert_model is None: self.hubert_model = load_hubert(self.config) - + file_index = ( ( file_index.strip(" ") @@ -107,7 +120,7 @@ class VC: if file_index != "" else file_index2 ) # 防止小白写错,自动帮他替换掉 - + audio_opt = Pipeline.pipeline( self.hubert_model, self.net_g, @@ -135,28 +148,32 @@ class VC: if os.path.exists(file_index) else "Index not used." ) - return f"Success.\n {index_info}\nTime:\n npy:{times[0]}s, f0:{times[1]}s, infer:{times[2]}s", (self.tgt_sr, audio_opt) + return ( + f"Success.\n {index_info}\nTime:\n npy:{times[0]}s, f0:{times[1]}s, infer:{times[2]}s", + (self.tgt_sr, audio_opt), + ) except: info = traceback.format_exc() print(info) return info, (None, None) - + def vc_multi( - self, - sid, - dir_path, - opt_root, - paths, - f0_up_key, - f0_method, - file_index, - file_index2, - index_rate, - filter_radius, - resample_sr, - rms_mix_rate, - protect, - format1): + self, + sid, + dir_path, + opt_root, + paths, + f0_up_key, + f0_method, + file_index, + file_index2, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, + format1, + ): try: dir_path = ( dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") @@ -165,7 +182,9 @@ class VC: os.makedirs(opt_root, exist_ok=True) try: if dir_path != "": - paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] + paths = [ + os.path.join(dir_path, name) for name in os.listdir(dir_path) + ] else: paths = [path.name for path in paths] except: @@ -193,7 +212,8 @@ class VC: tgt_sr, audio_opt = opt if format1 in ["wav", "flac"]: sf.write( - "%s/%s.%s" % (opt_root, os.path.basename(path), format1), + "%s/%s.%s" + % (opt_root, os.path.basename(path), format1), audio_opt, tgt_sr, ) @@ -216,4 +236,3 @@ class VC: yield "\n".join(infos) except: yield traceback.format_exc() - \ No newline at end of file diff --git a/infer/modules/vc/pipeline.py b/infer/modules/vc/pipeline.py index 54bc41d..3ac47cd 100644 --- a/infer/modules/vc/pipeline.py +++ b/infer/modules/vc/pipeline.py @@ -11,7 +11,7 @@ from functools import lru_cache now_dir = os.getcwd() sys.path.append(now_dir) - + bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) input_audio_path2wav = {} @@ -71,18 +71,18 @@ class Pipeline(object): self.t_center = self.sr * self.x_center # 查询切点位置 self.t_max = self.sr * self.x_max # 免查询时长阈值 self.device = config.device - + self.model_rmvpe = None def get_f0( - self, - input_audio_path, - x, - p_len, - f0_up_key, - f0_method, - filter_radius, - inp_f0=None, + self, + input_audio_path, + x, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0=None, ): global input_audio_path2wav time_step = self.window / self.sr * 1000 @@ -141,12 +141,12 @@ class Pipeline(object): "rmvpe.pt", is_half=self.is_half, device=self.device ) f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) - + if "privateuseone" in str(self.device): # clean ortruntime memory del self.model_rmvpe.model del self.model_rmvpe print("cleaning ortruntime memory") - + f0 *= pow(2, f0_up_key / 12) # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) tf0 = self.sr // self.window # 每秒f0点数 @@ -157,8 +157,8 @@ class Pipeline(object): replace_f0 = np.interp( list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] ) - shape = f0[self.x_pad * tf0: self.x_pad * tf0 + len(replace_f0)].shape[0] - f0[self.x_pad * tf0: self.x_pad * tf0 + len(replace_f0)] = replace_f0[ + shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] + f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ :shape ] # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) @@ -173,19 +173,19 @@ class Pipeline(object): return f0_coarse, f0bak # 1-0 def vc( - self, - model, - net_g, - sid, - audio0, - pitch, - pitchf, - times, - index, - big_npy, - index_rate, - version, - protect, + self, + model, + net_g, + sid, + audio0, + pitch, + pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, ): # ,file_index,file_big_npy feats = torch.from_numpy(audio0) if self.is_half: @@ -275,26 +275,26 @@ class Pipeline(object): return audio1 def pipeline( - self, - model, - net_g, - sid, - audio, - input_audio_path, - times, - f0_up_key, - f0_method, - file_index, - # file_big_npy, - index_rate, - if_f0, - filter_radius, - tgt_sr, - resample_sr, - rms_mix_rate, - version, - protect, - f0_file=None, + self, + model, + net_g, + sid, + audio, + input_audio_path, + times, + f0_up_key, + f0_method, + file_index, + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=None, ): print(file_index) if ( @@ -319,14 +319,14 @@ class Pipeline(object): if audio_pad.shape[0] > self.t_max: audio_sum = np.zeros_like(audio) for i in range(self.window): - audio_sum += audio_pad[i: i - self.window] + audio_sum += audio_pad[i : i - self.window] for t in range(self.t_center, audio.shape[0], self.t_center): opt_ts.append( t - self.t_query + np.where( - np.abs(audio_sum[t - self.t_query: t + self.t_query]) - == np.abs(audio_sum[t - self.t_query: t + self.t_query]).min() + np.abs(audio_sum[t - self.t_query : t + self.t_query]) + == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min() )[0][0] ) s = 0 @@ -374,16 +374,16 @@ class Pipeline(object): model, net_g, sid, - audio_pad[s: t + self.t_pad2 + self.window], - pitch[:, s // self.window: (t + self.t_pad2) // self.window], - pitchf[:, s // self.window: (t + self.t_pad2) // self.window], + audio_pad[s : t + self.t_pad2 + self.window], + pitch[:, s // self.window : (t + self.t_pad2) // self.window], + pitchf[:, s // self.window : (t + self.t_pad2) // self.window], times, index, big_npy, index_rate, version, protect, - )[self.t_pad_tgt: -self.t_pad_tgt] + )[self.t_pad_tgt : -self.t_pad_tgt] ) else: audio_opt.append( @@ -391,7 +391,7 @@ class Pipeline(object): model, net_g, sid, - audio_pad[s: t + self.t_pad2 + self.window], + audio_pad[s : t + self.t_pad2 + self.window], None, None, times, @@ -400,7 +400,7 @@ class Pipeline(object): index_rate, version, protect, - )[self.t_pad_tgt: -self.t_pad_tgt] + )[self.t_pad_tgt : -self.t_pad_tgt] ) s = t if if_f0 == 1: @@ -410,15 +410,15 @@ class Pipeline(object): net_g, sid, audio_pad[t:], - pitch[:, t // self.window:] if t is not None else pitch, - pitchf[:, t // self.window:] if t is not None else pitchf, + pitch[:, t // self.window :] if t is not None else pitch, + pitchf[:, t // self.window :] if t is not None else pitchf, times, index, big_npy, index_rate, version, protect, - )[self.t_pad_tgt: -self.t_pad_tgt] + )[self.t_pad_tgt : -self.t_pad_tgt] ) else: audio_opt.append( @@ -435,7 +435,7 @@ class Pipeline(object): index_rate, version, protect, - )[self.t_pad_tgt: -self.t_pad_tgt] + )[self.t_pad_tgt : -self.t_pad_tgt] ) audio_opt = np.concatenate(audio_opt) if rms_mix_rate != 1: @@ -453,4 +453,3 @@ class Pipeline(object): if torch.cuda.is_available(): torch.cuda.empty_cache() return audio_opt - \ No newline at end of file diff --git a/infer/modules/vc/utils.py b/infer/modules/vc/utils.py index 9ba2ea5..933775f 100644 --- a/infer/modules/vc/utils.py +++ b/infer/modules/vc/utils.py @@ -6,7 +6,19 @@ from fairseq import checkpoint_utils def get_index_path_from_model(sid): - return next((f for f in [os.path.join(root, name) for root, dirs, files in os.walk(os.getenv("index_root"), topdown=False) for name in files if name.endswith(".index") and "trained" not in name] if sid.split(".")[0] in f), "") + return next( + ( + f + for f in [ + os.path.join(root, name) + for root, dirs, files in os.walk(os.getenv("index_root"), topdown=False) + for name in files + if name.endswith(".index") and "trained" not in name + ] + if sid.split(".")[0] in f + ), + "", + ) def load_hubert(config): @@ -21,8 +33,8 @@ def load_hubert(config): else: hubert_model = hubert_model.float() return hubert_model.eval() - - + + def load_audio(file, sr): try: # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 @@ -39,4 +51,4 @@ def load_audio(file, sr): except Exception as e: raise RuntimeError(f"Failed to load audio: {e}") - return np.frombuffer(out, np.float32).flatten() \ No newline at end of file + return np.frombuffer(out, np.float32).flatten()