mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-01-01 12:35:04 +08:00
fix: MacOS 纯 CPU 推理时 Segmentation fault: 11
see: facebookresearch/faiss#2317 facebookresearch#2410
This commit is contained in:
parent
6c7c1d933f
commit
ff1a711cad
@ -129,10 +129,10 @@
|
|||||||
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
|
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#@markdown 数据集位置\n",
|
"#@markdown 数据集位置\n",
|
||||||
"DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\" #@param {type:\"string\"}\n",
|
"DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" #@param {type:\"string\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!mkdir -p /content/dataset\n",
|
"!mkdir -p /content/dataset\n",
|
||||||
"!unzip -d /content/dataset {DATASET}"
|
"!unzip -d /content/dataset -B {DATASET}"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "Mwk7Q0Loqzjx"
|
"id": "Mwk7Q0Loqzjx"
|
||||||
@ -140,13 +140,26 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"#@title 重命名数据集中的重名文件\n",
|
||||||
|
"!ls -a /content/dataset/\n",
|
||||||
|
"!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "PDlFxWHWEynD"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"source": [
|
"source": [
|
||||||
"#@title 启动web\n",
|
"#@title 启动web\n",
|
||||||
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
|
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
|
||||||
"%load_ext tensorboard\n",
|
"# %load_ext tensorboard\n",
|
||||||
"%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
|
"# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
|
||||||
"!python3 infer-web.py --colab --pycmd python3"
|
"!python3 infer-web.py --colab --pycmd python3"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -164,7 +177,7 @@
|
|||||||
"#@markdown 模型名\n",
|
"#@markdown 模型名\n",
|
||||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||||
"#@markdown 模型epoch\n",
|
"#@markdown 模型epoch\n",
|
||||||
"MODELEPOCH = 7500 #@param {type:\"integer\"}\n",
|
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
|
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
|
||||||
@ -188,7 +201,7 @@
|
|||||||
"#@markdown 模型名\n",
|
"#@markdown 模型名\n",
|
||||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||||
"#@markdown 模型epoch\n",
|
"#@markdown 模型epoch\n",
|
||||||
"MODELEPOCH = 6000 #@param {type:\"integer\"}\n",
|
"MODELEPOCH = 7500 #@param {type:\"integer\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
|
"!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -241,7 +254,7 @@
|
|||||||
"#@markdown 模型名\n",
|
"#@markdown 模型名\n",
|
||||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||||
"#@markdown 停止的epoch\n",
|
"#@markdown 停止的epoch\n",
|
||||||
"MODELEPOCH = 2500 #@param {type:\"integer\"}\n",
|
"MODELEPOCH = 3200 #@param {type:\"integer\"}\n",
|
||||||
"#@markdown 保存epoch间隔\n",
|
"#@markdown 保存epoch间隔\n",
|
||||||
"EPOCHSAVE = 100 #@param {type:\"integer\"}\n",
|
"EPOCHSAVE = 100 #@param {type:\"integer\"}\n",
|
||||||
"#@markdown 采样率\n",
|
"#@markdown 采样率\n",
|
||||||
@ -262,7 +275,7 @@
|
|||||||
"#@markdown 模型名\n",
|
"#@markdown 模型名\n",
|
||||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||||
"#@markdown 选中模型epoch\n",
|
"#@markdown 选中模型epoch\n",
|
||||||
"MODELEPOCH = 7700 #@param {type:\"integer\"}\n",
|
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!echo \"备份选中的模型。。。\"\n",
|
"!echo \"备份选中的模型。。。\"\n",
|
||||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||||
@ -292,7 +305,7 @@
|
|||||||
"#@markdown 模型名\n",
|
"#@markdown 模型名\n",
|
||||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||||
"#@markdown 选中模型epoch\n",
|
"#@markdown 选中模型epoch\n",
|
||||||
"MODELEPOCH = 7700 #@param {type:\"integer\"}\n",
|
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!echo \"备份选中的模型。。。\"\n",
|
"!echo \"备份选中的模型。。。\"\n",
|
||||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||||
|
@ -9,7 +9,7 @@ import faiss
|
|||||||
ncpu=cpu_count()
|
ncpu=cpu_count()
|
||||||
ngpu=torch.cuda.device_count()
|
ngpu=torch.cuda.device_count()
|
||||||
gpu_infos=[]
|
gpu_infos=[]
|
||||||
if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
|
if((not torch.cuda.is_available()) or ngpu==0):if_gpu_ok=False
|
||||||
else:
|
else:
|
||||||
if_gpu_ok = False
|
if_gpu_ok = False
|
||||||
for i in range(ngpu):
|
for i in range(ngpu):
|
||||||
@ -140,7 +140,7 @@ def uvr(model_name,inp_root,save_root_vocal,paths,save_root_ins):
|
|||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print("clean_empty_cache")
|
print("clean_empty_cache")
|
||||||
torch.cuda.empty_cache()
|
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||||
yield "\n".join(infos)
|
yield "\n".join(infos)
|
||||||
|
|
||||||
#一个选项卡全局只能有一个音色
|
#一个选项卡全局只能有一个音色
|
||||||
@ -152,7 +152,7 @@ def get_vc(sid):
|
|||||||
print("clean_empty_cache")
|
print("clean_empty_cache")
|
||||||
del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt
|
del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt
|
||||||
hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None
|
hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None
|
||||||
torch.cuda.empty_cache()
|
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||||
###楼下不这么折腾清理不干净
|
###楼下不这么折腾清理不干净
|
||||||
if_f0 = cpt.get("f0", 1)
|
if_f0 = cpt.get("f0", 1)
|
||||||
if (if_f0 == 1):
|
if (if_f0 == 1):
|
||||||
@ -160,7 +160,7 @@ def get_vc(sid):
|
|||||||
else:
|
else:
|
||||||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||||
del net_g,cpt
|
del net_g,cpt
|
||||||
torch.cuda.empty_cache()
|
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||||
cpt=None
|
cpt=None
|
||||||
return {"visible": False, "__type__": "update"}
|
return {"visible": False, "__type__": "update"}
|
||||||
person = "%s/%s" % (weight_root, sid)
|
person = "%s/%s" % (weight_root, sid)
|
||||||
|
@ -104,7 +104,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
|||||||
"padding_mask": padding_mask.to(device),
|
"padding_mask": padding_mask.to(device),
|
||||||
"output_layer": 9, # layer 9
|
"output_layer": 9, # layer 9
|
||||||
}
|
}
|
||||||
torch.cuda.synchronize()
|
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||||
t0=ttime()
|
t0=ttime()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
logits = model.extract_features(**inputs)
|
logits = model.extract_features(**inputs)
|
||||||
@ -116,13 +116,13 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
|||||||
feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device)
|
feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device)
|
||||||
|
|
||||||
feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1)
|
feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1)
|
||||||
torch.cuda.synchronize()
|
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||||
t1=ttime()
|
t1=ttime()
|
||||||
# p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
# p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
||||||
p_len = min(feats.shape[1],10000)#
|
p_len = min(feats.shape[1],10000)#
|
||||||
pitch, pitchf = get_f0(audio, p_len,f0_up_key)
|
pitch, pitchf = get_f0(audio, p_len,f0_up_key)
|
||||||
p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
||||||
torch.cuda.synchronize()
|
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||||
t2=ttime()
|
t2=ttime()
|
||||||
feats = feats[:,:p_len, :]
|
feats = feats[:,:p_len, :]
|
||||||
pitch = pitch[:p_len]
|
pitch = pitch[:p_len]
|
||||||
@ -133,7 +133,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
|||||||
pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
|
pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf
|
audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf
|
||||||
torch.cuda.synchronize()
|
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||||
t3=ttime()
|
t3=ttime()
|
||||||
ta0+=(t1-t0)
|
ta0+=(t1-t0)
|
||||||
ta1+=(t2-t1)
|
ta1+=(t2-t1)
|
||||||
|
@ -4,7 +4,7 @@ scipy==1.9.3
|
|||||||
librosa==0.9.2
|
librosa==0.9.2
|
||||||
llvmlite==0.39.0
|
llvmlite==0.39.0
|
||||||
fairseq==0.12.2
|
fairseq==0.12.2
|
||||||
faiss-cpu==1.7.2
|
faiss-cpu==1.7.0
|
||||||
gradio
|
gradio
|
||||||
Cython
|
Cython
|
||||||
future>=0.18.3
|
future>=0.18.3
|
||||||
|
@ -72,6 +72,7 @@ class VC(object):
|
|||||||
"output_layer": 9, # layer 9
|
"output_layer": 9, # layer 9
|
||||||
}
|
}
|
||||||
t0 = ttime()
|
t0 = ttime()
|
||||||
|
print("vc npy start time:", t0)
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
logits = model.extract_features(**inputs)
|
logits = model.extract_features(**inputs)
|
||||||
feats = model.final_proj(logits[0])
|
feats = model.final_proj(logits[0])
|
||||||
@ -79,13 +80,14 @@ class VC(object):
|
|||||||
if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
|
if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
|
||||||
npy = feats[0].cpu().numpy()
|
npy = feats[0].cpu().numpy()
|
||||||
if(self.is_half==True):npy=npy.astype("float32")
|
if(self.is_half==True):npy=npy.astype("float32")
|
||||||
D, I = index.search(npy, 1)
|
_, I = index.search(npy, 1)
|
||||||
npy=big_npy[I.squeeze()]
|
npy=big_npy[I.squeeze()]
|
||||||
if(self.is_half==True):npy=npy.astype("float16")
|
if(self.is_half==True):npy=npy.astype("float16")
|
||||||
feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats
|
feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats
|
||||||
|
|
||||||
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
||||||
t1 = ttime()
|
t1 = ttime()
|
||||||
|
print("vc infer start time:", t1)
|
||||||
p_len = audio0.shape[0]//self.window
|
p_len = audio0.shape[0]//self.window
|
||||||
if(feats.shape[1]<p_len):
|
if(feats.shape[1]<p_len):
|
||||||
p_len=feats.shape[1]
|
p_len=feats.shape[1]
|
||||||
@ -99,8 +101,9 @@ class VC(object):
|
|||||||
else:
|
else:
|
||||||
audio1 = (net_g.infer(feats, p_len, sid)[0][0, 0] * 32768).data.cpu().float().numpy().astype(np.int16)
|
audio1 = (net_g.infer(feats, p_len, sid)[0][0, 0] * 32768).data.cpu().float().numpy().astype(np.int16)
|
||||||
del feats,p_len,padding_mask
|
del feats,p_len,padding_mask
|
||||||
torch.cuda.empty_cache()
|
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||||
t2 = ttime()
|
t2 = ttime()
|
||||||
|
print("vc infer end time:", t2)
|
||||||
times[0] += (t1 - t0)
|
times[0] += (t1 - t0)
|
||||||
times[2] += (t2 - t1)
|
times[2] += (t2 - t1)
|
||||||
return audio1
|
return audio1
|
||||||
@ -125,6 +128,7 @@ class VC(object):
|
|||||||
audio_opt=[]
|
audio_opt=[]
|
||||||
t=None
|
t=None
|
||||||
t1=ttime()
|
t1=ttime()
|
||||||
|
print("f0 start time:", t1)
|
||||||
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode='reflect')
|
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode='reflect')
|
||||||
p_len=audio_pad.shape[0]//self.window
|
p_len=audio_pad.shape[0]//self.window
|
||||||
inp_f0=None
|
inp_f0=None
|
||||||
@ -146,6 +150,7 @@ class VC(object):
|
|||||||
pitch = torch.tensor(pitch,device=self.device).unsqueeze(0).long()
|
pitch = torch.tensor(pitch,device=self.device).unsqueeze(0).long()
|
||||||
pitchf = torch.tensor(pitchf,device=self.device).unsqueeze(0).float()
|
pitchf = torch.tensor(pitchf,device=self.device).unsqueeze(0).float()
|
||||||
t2=ttime()
|
t2=ttime()
|
||||||
|
print("f0 end time:", t2)
|
||||||
times[1] += (t2 - t1)
|
times[1] += (t2 - t1)
|
||||||
for t in opt_ts:
|
for t in opt_ts:
|
||||||
t=t//self.window*self.window
|
t=t//self.window*self.window
|
||||||
@ -160,5 +165,5 @@ class VC(object):
|
|||||||
audio_opt.append(self.vc(model,net_g,sid,audio_pad[t:],None,None,times,index,big_npy,index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
audio_opt.append(self.vc(model,net_g,sid,audio_pad[t:],None,None,times,index,big_npy,index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
||||||
audio_opt=np.concatenate(audio_opt)
|
audio_opt=np.concatenate(audio_opt)
|
||||||
del pitch,pitchf,sid
|
del pitch,pitchf,sid
|
||||||
torch.cuda.empty_cache()
|
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||||
return audio_opt
|
return audio_opt
|
||||||
|
Loading…
Reference in New Issue
Block a user