diff --git a/configs/config.py b/configs/config.py index d259160..05819b9 100644 --- a/configs/config.py +++ b/configs/config.py @@ -19,6 +19,7 @@ version_config_list = [ "v2/32k.json", ] + def singleton_variable(func): def wrapper(*args, **kwargs): if not wrapper.instance: @@ -101,7 +102,7 @@ class Config: return True except Exception: return False - + def use_fp32_config(self): for config_file in version_config_list: self.json_config[config_file]["train"]["fp16_run"] = False diff --git a/gui_v1.py b/gui_v1.py index 93a60e4..9a2aebb 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -455,14 +455,17 @@ if __name__ == "__main__": inp_q, opt_q, device, - self.rvc if hasattr(self, "rvc") else None + self.rvc if hasattr(self, "rvc") else None, ) self.config.samplerate = self.rvc.tgt_sr self.config.crossfade_time = min( self.config.crossfade_time, self.config.block_time ) self.zc = self.rvc.tgt_sr // 100 - self.block_frame = int(np.round(self.config.block_time * self.config.samplerate / self.zc)) * self.zc + self.block_frame = ( + int(np.round(self.config.block_time * self.config.samplerate / self.zc)) + * self.zc + ) self.block_frame_16k = 160 * self.block_frame // self.zc self.crossfade_frame = int( self.config.crossfade_time * self.config.samplerate @@ -484,7 +487,9 @@ if __name__ == "__main__": ), dtype="float32", ) - self.input_wav_res: torch.Tensor= torch.zeros(160 * len(self.input_wav) // self.zc) + self.input_wav_res: torch.Tensor = torch.zeros( + 160 * len(self.input_wav) // self.zc + ) self.output_wav_cache: torch.Tensor = torch.zeros( int( np.ceil( @@ -577,12 +582,18 @@ if __name__ == "__main__": if db_threhold[i]: indata[i * hop_length : (i + 1) * hop_length] = 0 self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :] - self.input_wav[-self.block_frame: ] = indata + self.input_wav[-self.block_frame :] = indata # infer - inp = torch.from_numpy(self.input_wav[-self.block_frame-2*self.zc :]).to(device) - self.input_wav_res[ : -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone() - self.input_wav_res[-self.block_frame_16k-160 :] = self.resampler(inp)[160 :] + inp = torch.from_numpy( + self.input_wav[-self.block_frame - 2 * self.zc :] + ).to(device) + self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[ + self.block_frame_16k : + ].clone() + self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(inp)[ + 160: + ] rate = ( self.crossfade_frame + self.sola_search_frame + self.block_frame ) / ( @@ -592,11 +603,11 @@ if __name__ == "__main__": + self.block_frame ) f0_extractor_frame = self.block_frame_16k + 800 - if self.config.f0method == 'rmvpe': + if self.config.f0method == "rmvpe": f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) res2 = self.rvc.infer( self.input_wav_res, - self.input_wav_res[-f0_extractor_frame :].cpu().numpy(), + self.input_wav_res[-f0_extractor_frame:].cpu().numpy(), self.block_frame_16k, rate, self.pitch, @@ -720,9 +731,7 @@ if __name__ == "__main__": sd.default.device[1] = output_device_indices[ output_devices.index(output_device) ] - logger.info( - "Input device: %s:%d", str(sd.default.device[0]), input_device - ) + logger.info("Input device: %s:%d", str(sd.default.device[0]), input_device) logger.info( "Output device: %s:%d", str(sd.default.device[1]), output_device ) diff --git a/infer-web.py b/infer-web.py index f263384..010766b 100644 --- a/infer-web.py +++ b/infer-web.py @@ -208,9 +208,15 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") f.close() per = 3.0 if config.is_half else 3.7 - cmd = ( - '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' - % (config.python_cmd, trainset_dir, sr, n_p, now_dir, exp_dir, config.noparallel, per) + cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % ( + config.python_cmd, + trainset_dir, + sr, + n_p, + now_dir, + exp_dir, + config.noparallel, + per, ) logger.info(cmd) p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir @@ -272,14 +278,17 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp leng = len(gpus_rmvpe) ps = [] for idx, n_g in enumerate(gpus_rmvpe): - cmd = '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' % ( - config.python_cmd, - leng, - idx, - n_g, - now_dir, - exp_dir, - config.is_half, + cmd = ( + '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' + % ( + config.python_cmd, + leng, + idx, + n_g, + now_dir, + exp_dir, + config.is_half, + ) ) logger.info(cmd) p = Popen( @@ -333,15 +342,18 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): - cmd = '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' % ( - config.python_cmd, - config.device, - leng, - idx, - n_g, - now_dir, - exp_dir, - version19, + cmd = ( + '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' + % ( + config.python_cmd, + config.device, + leng, + idx, + n_g, + now_dir, + exp_dir, + version19, + ) ) logger.info(cmd) p = Popen( @@ -379,12 +391,16 @@ def get_pretrained_models(path_str, f0_str, sr2): if not if_pretrained_generator_exist: logger.warn( "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model", - path_str, f0_str, sr2 + path_str, + f0_str, + sr2, ) if not if_pretrained_discriminator_exist: logger.warn( "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model", - path_str, f0_str, sr2 + path_str, + f0_str, + sr2, ) return ( "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) @@ -421,9 +437,11 @@ def change_version19(sr2, if_f0_3, version19): def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 path_str = "" if version19 == "v1" else "_v2" return ( - {"visible": if_f0_3, "__type__": "update"}, *get_pretrained_models(path_str, "f0", sr2) + {"visible": if_f0_3, "__type__": "update"}, + *get_pretrained_models(path_str, "f0", sr2), ) + # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) def click_train( exp_dir1, @@ -522,24 +540,33 @@ def click_train( config_save_path = os.path.join(exp_dir, "config.json") if not pathlib.Path(config_save_path).exists(): with open(config_save_path, "w", encoding="utf-8") as f: - json.dump(config.json_config[config_path], f, ensure_ascii=False, indent=4, sort_keys=True) + json.dump( + config.json_config[config_path], + f, + ensure_ascii=False, + indent=4, + sort_keys=True, + ) f.write("\n") if gpus16: - cmd = '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( - config.python_cmd, - exp_dir1, - sr2, - 1 if if_f0_3 else 0, - batch_size12, - gpus16, - total_epoch11, - save_epoch10, - "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", - "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", - 1 if if_save_latest13 == i18n("是") else 0, - 1 if if_cache_gpu17 == i18n("是") else 0, - 1 if if_save_every_weights18 == i18n("是") else 0, - version19, + cmd = ( + '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' + % ( + config.python_cmd, + exp_dir1, + sr2, + 1 if if_f0_3 else 0, + batch_size12, + gpus16, + total_epoch11, + save_epoch10, + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", + 1 if if_save_latest13 == i18n("是") else 0, + 1 if if_cache_gpu17 == i18n("是") else 0, + 1 if if_save_every_weights18 == i18n("是") else 0, + version19, + ) ) else: cmd = ( diff --git a/infer/lib/infer_pack/models.py b/infer/lib/infer_pack/models.py index ae9b7b4..d02d8cf 100644 --- a/infer/lib/infer_pack/models.py +++ b/infer/lib/infer_pack/models.py @@ -617,7 +617,10 @@ class SynthesizerTrnMs256NSFsid(nn.Module): ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) logger.debug( - "gin_channels: " + str(gin_channels) + ", self.spk_embed_dim: " + str(self.spk_embed_dim) + "gin_channels: " + + str(gin_channels) + + ", self.spk_embed_dim: " + + str(self.spk_embed_dim) ) def remove_weight_norm(self): @@ -735,7 +738,10 @@ class SynthesizerTrnMs768NSFsid(nn.Module): ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) logger.debug( - "gin_channels: " + str(gin_channels) + ", self.spk_embed_dim: " + str(self.spk_embed_dim) + "gin_channels: " + + str(gin_channels) + + ", self.spk_embed_dim: " + + str(self.spk_embed_dim) ) def remove_weight_norm(self): @@ -850,7 +856,10 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module): ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) logger.debug( - "gin_channels: " + str(gin_channels) + ", self.spk_embed_dim: " + str(self.spk_embed_dim) + "gin_channels: " + + str(gin_channels) + + ", self.spk_embed_dim: " + + str(self.spk_embed_dim) ) def remove_weight_norm(self): @@ -958,7 +967,10 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module): ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) logger.debug( - "gin_channels: " + str(gin_channels) + ", self.spk_embed_dim: " + str(self.spk_embed_dim) + "gin_channels: " + + str(gin_channels) + + ", self.spk_embed_dim: " + + str(self.spk_embed_dim) ) def remove_weight_norm(self): diff --git a/infer/lib/infer_pack/models_onnx.py b/infer/lib/infer_pack/models_onnx.py index 0580924..3e99763 100644 --- a/infer/lib/infer_pack/models_onnx.py +++ b/infer/lib/infer_pack/models_onnx.py @@ -621,7 +621,10 @@ class SynthesizerTrnMsNSFsidM(nn.Module): self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) self.speaker_map = None logger.debug( - "gin_channels: " + gin_channels + ", self.spk_embed_dim: " + self.spk_embed_dim + "gin_channels: " + + gin_channels + + ", self.spk_embed_dim: " + + self.spk_embed_dim ) def remove_weight_norm(self): diff --git a/infer/lib/train/utils.py b/infer/lib/train/utils.py index a19c662..e26b608 100644 --- a/infer/lib/train/utils.py +++ b/infer/lib/train/utils.py @@ -34,8 +34,10 @@ def load_checkpoint_d(checkpoint_path, combd, sbd, optimizer=None, load_opt=1): new_state_dict[k] = saved_state_dict[k] if saved_state_dict[k].shape != state_dict[k].shape: logger.warn( - "shape-%s-mismatch. need: %s, get: %s" - , k, state_dict[k].shape, saved_state_dict[k].shape + "shape-%s-mismatch. need: %s, get: %s", + k, + state_dict[k].shape, + saved_state_dict[k].shape, ) # raise KeyError except: @@ -110,8 +112,10 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1): new_state_dict[k] = saved_state_dict[k] if saved_state_dict[k].shape != state_dict[k].shape: logger.warn( - "shape-%s-mismatch|need-%s|get-%s" - , k, state_dict[k].shape, saved_state_dict[k].shape + "shape-%s-mismatch|need-%s|get-%s", + k, + state_dict[k].shape, + saved_state_dict[k].shape, ) # raise KeyError except: diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 774f741..d785155 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -215,7 +215,8 @@ class VC: else "Index not used." ) return ( - "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs." % (index_info, *times), + "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs." + % (index_info, *times), (self.tgt_sr, audio_opt), ) except: diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py index 5a7a44f..de15de3 100644 --- a/tools/rvc_for_realtime.py +++ b/tools/rvc_for_realtime.py @@ -2,6 +2,7 @@ import os import sys import traceback import logging + logger = logging.getLogger(__name__) from time import time as ttime @@ -47,7 +48,16 @@ if config.dml == True: # config.is_half=False########强制cpu测试 class RVC: def __init__( - self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device, last_rvc=None, + self, + key, + pth_path, + index_path, + index_rate, + n_cpu, + inp_q, + opt_q, + device, + last_rvc=None, ) -> None: """ 初始化 @@ -74,7 +84,7 @@ class RVC: self.pth_path = pth_path self.index_path = index_path self.index_rate = index_rate - + if last_rvc is None: models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( ["assets/hubert/hubert_base.pt"], @@ -90,7 +100,7 @@ class RVC: self.model = hubert_model else: self.model = last_rvc.model - + if last_rvc is None or last_rvc.pth_path != self.pth_path: cpt = torch.load(self.pth_path, map_location="cpu") self.tgt_sr = cpt["config"][-1] @@ -126,7 +136,7 @@ class RVC: self.version = last_rvc.version self.net_g = last_rvc.net_g self.is_half = last_rvc.is_half - + if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"): self.model_rmvpe = last_rvc.model_rmvpe except: @@ -324,10 +334,10 @@ class RVC: if self.if_f0 == 1: pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method) start_frame = block_frame_16k // 160 - end_frame = len(cache_pitch) - (pitch.shape[0] - 4) + start_frame - cache_pitch[:] = np.append(cache_pitch[start_frame : end_frame], pitch[3:-1]) + end_frame = len(cache_pitch) - (pitch.shape[0] - 4) + start_frame + cache_pitch[:] = np.append(cache_pitch[start_frame:end_frame], pitch[3:-1]) cache_pitchf[:] = np.append( - cache_pitchf[start_frame : end_frame], pitchf[3:-1] + cache_pitchf[start_frame:end_frame], pitchf[3:-1] ) p_len = min(feats.shape[1], 13000, cache_pitch.shape[0]) else: