optimize: 精简未用到的配置项并在特征提取初步引入mps (#32)

This commit is contained in:
源文雨 2023-04-11 18:14:55 +08:00 committed by GitHub
parent 0656591373
commit ecc744d748
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 82 additions and 57 deletions

View File

@ -1,3 +1,20 @@
########################硬件参数########################
#填写cuda:x, cpu 或 mps, x指代第几张卡只支持 N卡 / Apple Silicon 加速
device = "cuda:0"
#9-10-20-30-40系显卡无脑True不影响质量>=20显卡开启有加速
is_half = True
#默认0用上所有线程写数字限制CPU资源使用
n_cpu = 0
########################硬件参数########################
##################下为参数处理逻辑,勿动##################
########################命令行参数########################
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=7865, help="Listen port")
@ -5,34 +22,48 @@ parser.add_argument("--pycmd", type=str, default="python", help="Python command"
parser.add_argument("--colab", action='store_true', help="Launch in colab")
parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
cmd_opts = parser.parse_args()
############离线VC参数
inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换,别放非音频文件
opt_root=r"opt"#输出目录
f0_up_key=0#升降调整数男转女12女转男-12
person=r"weights\洛天依v3.pt"#目前只有洛天依v3
############硬件参数
device = "cuda:0"#填写cuda:x或cpux指代第几张卡只支持N卡加速
is_half=True#9-10-20-30-40系显卡无脑True不影响质量>=20显卡开启有加速
n_cpu=0#默认0用上所有线程写数字限制CPU资源使用
############python命令路径
python_cmd=cmd_opts.pycmd
listen_port=cmd_opts.port
iscolab=cmd_opts.colab
noparallel=cmd_opts.noparallel
############下头别动
########################命令行参数########################
import sys
import torch
if(torch.cuda.is_available()==False):
print("没有发现支持的N卡, 使用CPU进行推理")
device="cpu"
is_half=False
if(device!="cpu"):
gpu_name=torch.cuda.get_device_name(int(device.split(":")[-1]))
if("16"in gpu_name or "MX"in gpu_name):
# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
# check `getattr` and try it for compatibility
def has_mps() -> bool:
if sys.platform != "darwin":
return False
else:
if not getattr(torch, 'has_mps', False): return False
try:
torch.zeros(1).to(torch.device("mps"))
return True
except Exception:
return False
if(not torch.cuda.is_available()):
if has_mps():
print("没有发现支持的N卡, 使用MPS进行推理")
device = "mps"
else:
print("没有发现支持的N卡, 使用CPU进行推理")
device = "cpu"
is_half = False
if(device not in ["cpu", "mps"]):
gpu_name = torch.cuda.get_device_name(int(device.split(":")[-1]))
if("16" in gpu_name or "MX" in gpu_name):
print("16系显卡/MX系显卡强制单精度")
is_half=False
is_half = False
from multiprocessing import cpu_count
if(n_cpu==0):n_cpu=cpu_count()
if(is_half==True):
if(n_cpu==0): n_cpu=cpu_count()
if(is_half):
#6G显存配置
x_pad = 3
x_query = 10
@ -41,10 +72,6 @@ if(is_half==True):
else:
#5G显存配置
x_pad = 1
# x_query = 6
# x_center = 30
# x_max = 32
#6G显存配置
x_query = 6
x_center = 38
x_max = 41

View File

@ -1,13 +1,12 @@
import os,sys,traceback
if len(sys.argv) == 4:
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
exp_dir=sys.argv[3]
else:
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
i_gpu=sys.argv[3]
device=sys.argv[1]
n_part=int(sys.argv[2])
i_part=int(sys.argv[3])
if len(sys.argv) == 5:
exp_dir=sys.argv[4]
else:
i_gpu=sys.argv[4]
exp_dir=sys.argv[5]
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
import torch
@ -15,7 +14,6 @@ import torch.nn.functional as F
import soundfile as sf
import numpy as np
from fairseq import checkpoint_utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
def printt(strr):
@ -50,8 +48,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
)
model = models[0]
model = model.to(device)
if torch.cuda.is_available():
model = model.half()
printt("move model to "+device)
if device != "cpu": model = model.half()
model.eval()
todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@ -70,7 +68,7 @@ else:
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
"source": feats.half().to(device) if device != "cpu" else feats.to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9, # layer 9
}

View File

@ -36,7 +36,7 @@ from fairseq import checkpoint_utils
import gradio as gr
import logging
from vc_infer_pipeline import VC
from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel
from config import is_half,device,python_cmd,listen_port,iscolab,noparallel
from infer_uvr5 import _audio_pre_
from my_utils import load_audio
from train.process_ckpt import show_info,change_info,merge,extract_small_model
@ -53,7 +53,7 @@ class ToolButton(gr.Button, gr.components.FormComponent):
hubert_model=None
def load_hubert():
global hubert_model
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
hubert_model = models[0]
hubert_model = hubert_model.to(device)
if(is_half):hubert_model = hubert_model.half()
@ -79,7 +79,7 @@ def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_np
if(hubert_model==None):load_hubert()
if_f0 = cpt.get("f0", 1)
audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file)
print("npy: ", times[0], "s, f0:", times[1], "s, infer: ", times[2], "s", sep='')
print("npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep='')
return "Success", (tgt_sr, audio_opt)
except:
info=traceback.format_exc()
@ -267,7 +267,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
leng=len(gpus)
ps=[]
for idx,n_g in enumerate(gpus):
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
cmd=python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s"%(device,leng,idx,n_g,now_dir,exp_dir)
print(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
@ -382,7 +382,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
leng=len(gpus)
ps=[]
for idx,n_g in enumerate(gpus):
cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
cmd=python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s"%(device,leng,idx,n_g,now_dir,exp_dir1)
yield get_info_str(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)

View File

@ -345,7 +345,7 @@ class SourceModuleHnNSF(torch.nn.Module):
def forward(self, x,upp=None):
sine_wavs, uv, _ = self.l_sin_gen(x,upp)
if(self.is_half==True):sine_wavs=sine_wavs.half()
if(self.is_half):sine_wavs=sine_wavs.half()
sine_merge = self.l_tanh(self.l_linear(sine_wavs))
return sine_merge,None,None# noise, uv
class GeneratorNSF(torch.nn.Module):

View File

@ -345,7 +345,7 @@ class SourceModuleHnNSF(torch.nn.Module):
def forward(self, x,upp=None):
sine_wavs, uv, _ = self.l_sin_gen(x,upp)
if(self.is_half==True):sine_wavs=sine_wavs.half()
if(self.is_half):sine_wavs=sine_wavs.half()
sine_merge = self.l_tanh(self.l_linear(sine_wavs))
return sine_merge,None,None# noise, uv
class GeneratorNSF(torch.nn.Module):

View File

@ -39,7 +39,7 @@ class _audio_pre_():
cpk = torch.load( model_path , map_location='cpu')
model.load_state_dict(cpk)
model.eval()
if(is_half==True):model = model.half().to(device)
if(is_half):model = model.half().to(device)
else:model = model.to(device)
self.mp = mp

View File

@ -182,4 +182,4 @@ def main():
if __name__ == '__main__':
main()
main()

View File

@ -40,7 +40,7 @@ class PreProcess():
os.makedirs(self.gt_wavs_dir,exist_ok=True)
os.makedirs(self.wavs16k_dir,exist_ok=True)
def print(self, strr):
def println(self, strr):
mutex.acquire()
print(strr)
self.f.write("%s\n" % strr)
@ -70,9 +70,9 @@ class PreProcess():
tmp_audio = audio[start:]
break
self.norm_write(tmp_audio, idx0, idx1)
self.print("%s->Suc."%path)
self.println("%s->Suc."%path)
except:
self.print("%s->%s"%(path,traceback.format_exc()))
self.println("%s->%s"%(path,traceback.format_exc()))
def pipeline_mp(self,infos):
for path, idx0 in infos:
@ -91,14 +91,14 @@ class PreProcess():
ps.append(p)
for p in ps:p.join()
except:
self.print("Fail. %s"%traceback.format_exc())
self.println("Fail. %s"%traceback.format_exc())
def preprocess_trainset(inp_root, sr, n_p, exp_dir):
pp=PreProcess(sr,exp_dir)
pp.print("start preprocess")
pp.print(sys.argv)
pp.println("start preprocess")
pp.println(sys.argv)
pp.pipeline_mp_inp_dir(inp_root,n_p)
pp.print("end preprocess")
pp.println("end preprocess")
if __name__=='__main__':
preprocess_trainset(inp_root, sr, n_p, exp_dir)

View File

@ -27,7 +27,7 @@ def inference(X_spec, device, model, aggressiveness,data):
start = i * roi_size
X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
X_mag_window = torch.from_numpy(X_mag_window)
if(is_half==True):X_mag_window=X_mag_window.half()
if(is_half):X_mag_window=X_mag_window.half()
X_mag_window=X_mag_window.to(device)
pred = model.predict(X_mag_window, aggressiveness)

View File

@ -58,7 +58,7 @@ class VC(object):
def vc(self,model,net_g,sid,audio0,pitch,pitchf,times,index,big_npy,index_rate):#,file_index,file_big_npy
feats = torch.from_numpy(audio0)
if(self.is_half==True):feats=feats.half()
if(self.is_half):feats=feats.half()
else:feats=feats.float()
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
@ -78,10 +78,10 @@ class VC(object):
if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
npy = feats[0].cpu().numpy()
if(self.is_half==True):npy=npy.astype("float32")
if(self.is_half):npy=npy.astype("float32")
_, I = index.search(npy, 1)
npy=big_npy[I.squeeze()]
if(self.is_half==True):npy=npy.astype("float16")
if(self.is_half):npy=npy.astype("float16")
feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)